@article {718, title = {Using AnABlast for intergenic sORF prediction in the Caenorhabditis elegans genome.}, journal = {Bioinformatics}, volume = {36}, year = {2020}, month = {2020 12 08}, pages = {4827-4832}, abstract = {

MOTIVATION: Short bioactive peptides encoded by small open reading frames (sORFs) play important roles in eukaryotes. Bioinformatics prediction of ORFs is an early step in a genome sequence analysis, but sORFs encoding short peptides, often using non-AUG initiation codons, are not easily discriminated from false ORFs occurring by chance.

RESULTS: AnABlast is a computational tool designed to highlight putative protein-coding regions in genomic DNA sequences. This protein-coding finder is independent of ORF length and reading frame shifts, thus making of AnABlast a potentially useful tool to predict sORFs. Using this algorithm, here, we report the identification of 82 putative new intergenic sORFs in the Caenorhabditis elegans genome. Sequence similarity, motif presence, expression data and RNA interference experiments support that the underlined sORFs likely encode functional peptides, encouraging the use of AnABlast as a new approach for the accurate prediction of intergenic sORFs in annotated eukaryotic genomes.

AVAILABILITY AND IMPLEMENTATION: AnABlast is freely available at http://www.bioinfocabd.upo.es/ab/. The C.elegans genome browser with AnABlast results, annotated genes and all data used in this study is available at http://www.bioinfocabd.upo.es/celegans.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.

}, keywords = {Animals, Caenorhabditis elegans, Computational Biology, Genome, Open Reading Frames, Software}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btaa608}, author = {Casimiro-Soriguer, C S and Rigual, M M and Brokate-Llanos, A M and Mu{\~n}oz, M J and Garz{\'o}n, A and P{\'e}rez-Pulido, A J and Jimenez, J} } @article {388, title = {Reference genome assessment from a population scale perspective: an accurate profile of variability and noise.}, journal = {Bioinformatics}, volume = {33}, year = {2017}, month = {2017 Nov 15}, pages = {3511-3517}, abstract = {

Motivation: Current plant and animal genomic studies are often based on newly assembled genomes that have not been properly consolidated. In this scenario, misassembled regions can easily lead to false-positive findings. Despite quality control scores are included within genotyping protocols, they are usually employed to evaluate individual sample quality rather than reference sequence reliability. We propose a statistical model that combines quality control scores across samples in order to detect incongruent patterns at every genomic region. Our model is inherently robust since common artifact signals are expected to be shared between independent samples over misassembled regions of the genome.

Results: The reliability of our protocol has been extensively tested through different experiments and organisms with accurate results, improving state-of-the-art methods. Our analysis demonstrates synergistic relations between quality control scores and allelic variability estimators, that improve the detection of misassembled regions, and is able to find strong artifact signals even within the human reference assembly. Furthermore, we demonstrated how our model can be trained to properly rank the confidence of a set of candidate variants obtained from new independent samples.

Availability and implementation: This tool is freely available at http://gitlab.com/carbonell/ces.

Contact: jcarbonell.cipf@gmail.com or joaquin.dopazo@juntadeandalucia.es.

Supplementary information: Supplementary data are available at Bioinformatics online.

}, keywords = {Animals, Genetic Variation, Genome, Genomics, Genotype, Humans, Models, Statistical, Quality Control, Reproducibility of Results, Software}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btx482}, url = {https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/btx482}, author = {Carbonell-Caballero, Jos{\'e} and Amadoz, Alicia and Alonso, Roberto and Hidalgo, Marta R and Cubuk, Cankut and Conesa, David and L{\'o}pez-Qu{\'\i}lez, Antonio and Dopazo, Joaquin} } @article {533, title = {Evidence for short-time divergence and long-time conservation of tissue-specific expression after gene duplication.}, journal = {Brief Bioinform}, volume = {12}, year = {2011}, month = {2011 Sep}, pages = {442-8}, abstract = {

Gene duplication is one of the main mechanisms by which genomes can acquire novel functions. It has been proposed that the retention of gene duplicates can be associated to processes of tissue expression divergence. These models predict that acquisition of divergent expression patterns should be acquired shortly after the duplication, and that larger divergence in tissue expression would be expected for paralogs, as compared to orthologs of a similar age. Many studies have shown that gene duplicates tend to have divergent expression patterns and that gene family expansions are associated with high levels of tissue specificity. However, the timeframe in which these processes occur have rarely been investigated in detail, particularly in vertebrates, and most analyses do not include direct comparisons of orthologs as a baseline for the expected levels of tissue specificity in absence of duplications. To assess the specific contribution of duplications to expression divergence, we combine here phylogenetic analyses and expression data from human and mouse. In particular, we study differences in spatial expression among human-mouse paralogs, specifically duplicated after the radiation of mammals, and compare them to pairs of orthologs in the same species. Our results show that gene duplication leads to increased levels of tissue specificity and that this tends to occur promptly after the duplication event.

}, keywords = {Animals, Conserved Sequence, Evolution, Molecular, Gene Duplication, gene expression, Genome, Humans, Mice, Organ Specificity}, issn = {1477-4054}, doi = {10.1093/bib/bbr022}, author = {Huerta-Cepas, Jaime and Dopazo, Joaquin and Huynen, Martijn A and Gabald{\'o}n, Toni} } @article {536, title = {Mutation screening of multiple genes in Spanish patients with autosomal recessive retinitis pigmentosa by targeted resequencing.}, journal = {PLoS One}, volume = {6}, year = {2011}, month = {2011}, pages = {e27894}, abstract = {

Retinitis Pigmentosa (RP) is a heterogeneous group of inherited retinal dystrophies characterised ultimately by the loss of photoreceptor cells. RP is the leading cause of visual loss in individuals younger than 60 years, with a prevalence of about 1 in 4000. The molecular genetic diagnosis of autosomal recessive RP (arRP) is challenging due to the large genetic and clinical heterogeneity. Traditional methods for sequencing arRP genes are often laborious and not easily available and a screening technique that enables the rapid detection of the genetic cause would be very helpful in the clinical practice. The goal of this study was to develop and apply microarray-based resequencing technology capable of detecting both known and novel mutations on a single high-throughput platform. Hence, the coding regions and exon/intron boundaries of 16 arRP genes were resequenced using microarrays in 102 Spanish patients with clinical diagnosis of arRP. All the detected variations were confirmed by direct sequencing and potential pathogenicity was assessed by functional predictions and frequency in controls. For validation purposes 4 positive controls for variants consisting of previously identified changes were hybridized on the array. As a result of the screening, we detected 44 variants, of which 15 are very likely pathogenic detected in 14 arRP families (14\%). Finally, the design of this array can easily be transformed in an equivalent diagnostic system based on targeted enrichment followed by next generation sequencing.

}, keywords = {Alleles, DNA Mutational Analysis, Exons, Genetic Variation, Genome, Hispanic or Latino, Humans, Introns, Language, mutation, Mutation, Missense, Oligonucleotide Array Sequence Analysis, Polymerase Chain Reaction, Reproducibility of Results, Retinitis pigmentosa, United States}, issn = {1932-6203}, doi = {10.1371/journal.pone.0027894}, author = {Gonz{\'a}lez-del Pozo, Mar{\'\i}a and Borrego, Salud and Barrag{\'a}n, Isabel and Pieras, Juan I and Santoyo, Javier and Matamala, Nerea and Naranjo, Bel{\'e}n and Dopazo, Joaquin and Anti{\v n}olo, Guillermo} } @article {599, title = {SNP and haplotype mapping for genetic analysis in the rat.}, journal = {Nat Genet}, volume = {40}, year = {2008}, month = {2008 May}, pages = {560-6}, abstract = {

The laboratory rat is one of the most extensively studied model organisms. Inbred laboratory rat strains originated from limited Rattus norvegicus founder populations, and the inherited genetic variation provides an excellent resource for the correlation of genotype to phenotype. Here, we report a survey of genetic variation based on almost 3 million newly identified SNPs. We obtained accurate and complete genotypes for a subset of 20,238 SNPs across 167 distinct inbred rat strains, two rat recombinant inbred panels and an F2 intercross. Using 81\% of these SNPs, we constructed high-density genetic maps, creating a large dataset of fully characterized SNPs for disease gene mapping. Our data characterize the population structure and illustrate the degree of linkage disequilibrium. We provide a detailed SNP map and demonstrate its utility for mapping of quantitative trait loci. This community resource is openly available and augments the genetic tools for this workhorse of physiological studies.

}, keywords = {Animals, Chromosome Mapping, Databases, Genetic, Genome, Haplotypes, Linkage Disequilibrium, Phylogeny, Polymorphism, Single Nucleotide, Quantitative Trait Loci, Rats, Rats, Inbred Strains, Recombination, Genetic}, issn = {1546-1718}, doi = {10.1038/ng.124}, author = {Saar, Kathrin and Beck, Alfred and Bihoreau, Marie-Th{\'e}r{\`e}se and Birney, Ewan and Brocklebank, Denise and Chen, Yuan and Cuppen, Edwin and Demonchy, Stephanie and Dopazo, Joaquin and Flicek, Paul and Foglio, Mario and Fujiyama, Asao and Gut, Ivo G and Gauguier, Dominique and Guig{\'o}, Roderic and Guryev, Victor and Heinig, Matthias and Hummel, Oliver and Jahn, Niels and Klages, Sven and Kren, Vladimir and Kube, Michael and Kuhl, Heiner and Kuramoto, Takashi and Kuroki, Yoko and Lechner, Doris and Lee, Young-Ae and Lopez-Bigas, Nuria and Lathrop, G Mark and Mashimo, Tomoji and Medina, Ignacio and Mott, Richard and Patone, Giannino and Perrier-Cornet, Jeanne-Antide and Platzer, Matthias and Pravenec, Michal and Reinhardt, Richard and Sakaki, Yoshiyuki and Schilhabel, Markus and Schulz, Herbert and Serikawa, Tadao and Shikhagaie, Medya and Tatsumoto, Shouji and Taudien, Stefan and Toyoda, Atsushi and Voigt, Birger and Zelenika, Diana and Zimdahl, Heike and Hubner, Norbert} }