@article {398, title = {Genomics of the origin and evolution of Citrus.}, journal = {Nature}, volume = {554}, year = {2018}, month = {2018 02 15}, pages = {311-316}, abstract = {

The genus Citrus, comprising some of the most widely cultivated fruit crops worldwide, includes an uncertain number of species. Here we describe ten natural citrus species, using genomic, phylogenetic and biogeographic analyses of 60 accessions representing diverse citrus germ plasms, and propose that citrus diversified during the late Miocene epoch through a rapid southeast Asian radiation that correlates with a marked weakening of the monsoons. A second radiation enabled by migration across the Wallace line gave rise to the Australian limes in the early Pliocene epoch. Further identification and analyses of hybrids and admixed genomes provides insights into the genealogy of major commercial cultivars of citrus. Among mandarins and sweet orange, we find an extensive network of relatedness that illuminates the domestication of these groups. Widespread pummelo admixture among these mandarins and its correlation with fruit size and acidity suggests a plausible role of pummelo introgression in the selection of palatable mandarins. This work provides a new evolutionary framework for the genus Citrus.

}, keywords = {Asia, Southeastern, Biodiversity, citrus, Crop Production, Evolution, Molecular, Genetic Speciation, Genome, Plant, Genomics, Haplotypes, Heterozygote, History, Ancient, Human Migration, Hybridization, Genetic, Phylogeny}, issn = {1476-4687}, doi = {10.1038/nature25447}, author = {Wu, Guohong Albert and Terol, Javier and Iba{\~n}ez, Victoria and L{\'o}pez-Garc{\'\i}a, Antonio and P{\'e}rez-Rom{\'a}n, Estela and Borred{\'a}, Carles and Domingo, Concha and Tadeo, Francisco R and Carbonell-Caballero, Jos{\'e} and Alonso, Roberto and Curk, Franck and Du, Dongliang and Ollitrault, Patrick and Roose, Mikeal L and Dopazo, Joaquin and Gmitter, Frederick G and Rokhsar, Daniel S and Talon, Manuel} } @article {388, title = {Reference genome assessment from a population scale perspective: an accurate profile of variability and noise.}, journal = {Bioinformatics}, volume = {33}, year = {2017}, month = {2017 Nov 15}, pages = {3511-3517}, abstract = {

Motivation: Current plant and animal genomic studies are often based on newly assembled genomes that have not been properly consolidated. In this scenario, misassembled regions can easily lead to false-positive findings. Despite quality control scores are included within genotyping protocols, they are usually employed to evaluate individual sample quality rather than reference sequence reliability. We propose a statistical model that combines quality control scores across samples in order to detect incongruent patterns at every genomic region. Our model is inherently robust since common artifact signals are expected to be shared between independent samples over misassembled regions of the genome.

Results: The reliability of our protocol has been extensively tested through different experiments and organisms with accurate results, improving state-of-the-art methods. Our analysis demonstrates synergistic relations between quality control scores and allelic variability estimators, that improve the detection of misassembled regions, and is able to find strong artifact signals even within the human reference assembly. Furthermore, we demonstrated how our model can be trained to properly rank the confidence of a set of candidate variants obtained from new independent samples.

Availability and implementation: This tool is freely available at http://gitlab.com/carbonell/ces.

Contact: jcarbonell.cipf@gmail.com or joaquin.dopazo@juntadeandalucia.es.

Supplementary information: Supplementary data are available at Bioinformatics online.

}, keywords = {Animals, Genetic Variation, Genome, Genomics, Genotype, Humans, Models, Statistical, Quality Control, Reproducibility of Results, Software}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btx482}, url = {https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/btx482}, author = {Carbonell-Caballero, Jos{\'e} and Amadoz, Alicia and Alonso, Roberto and Hidalgo, Marta R and Cubuk, Cankut and Conesa, David and L{\'o}pez-Qu{\'\i}lez, Antonio and Dopazo, Joaquin} } @article {438, title = {Web-based network analysis and visualization using CellMaps.}, journal = {Bioinformatics}, volume = {32}, year = {2016}, month = {2016 10 01}, pages = {3041-3}, abstract = {

UNLABELLED: : CellMaps is an HTML5 open-source web tool that allows displaying, editing, exploring and analyzing biological networks as well as integrating metadata into them. Computations and analyses are remotely executed in high-end servers, and all the functionalities are available through RESTful web services. CellMaps can easily be integrated in any web page by using an available JavaScript API.

AVAILABILITY AND IMPLEMENTATION: The application is available at: http://cellmaps.babelomics.org/ and the code can be found in: https://github.com/opencb/cell-maps The client is implemented in JavaScript and the server in C and Java.

CONTACT: jdopazo@cipf.es

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.

}, keywords = {Biochemical Phenomena, Internet, Software}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btw332}, author = {Salavert, Francisco and Garc{\'\i}a-Alonso, Luz and S{\'a}nchez, Rub{\'e}n and Alonso, Roberto and Bleda, Marta and Medina, Ignacio and Dopazo, Joaquin} } @article {1129, title = {Babelomics 5.0: functional interpretation for new generations of genomic data.}, journal = {Nucleic acids research}, volume = {43}, number = {W1}, year = {2015}, month = {2015 Apr 20}, pages = {W117-W121}, abstract = {Babelomics has been running for more than one decade offering a user-friendly interface for the functional analysis of gene expression and genomic data. Here we present its fifth release, which includes support for Next Generation Sequencing data including gene expression (RNA-seq), exome or genome resequencing. Babelomics has simplified its interface, being now more intuitive. Improved visualization options, such as a genome viewer as well as an interactive network viewer, have been implemented. New technical enhancements at both, client and server sides, makes the user experience faster and more dynamic. Babelomics offers user-friendly access to a full range of methods that cover: (i) primary data analysis, (ii) a variety of tests for different experimental designs and (iii) different enrichment and network analysis algorithms for the interpretation of the results of such tests in the proper functional context. In addition to the public server, local copies of Babelomics can be downloaded and installed. Babelomics is freely available at: http://www.babelomics.org.}, keywords = {babelomics, data integration, gene set analysis, interactome, network analysis, NGS, RNA-seq, Systems biology, transcriptomics}, issn = {1362-4962}, doi = {10.1093/nar/gkv384}, url = {http://nar.oxfordjournals.org/content/43/W1/W117}, author = {Alonso, Roberto and Salavert, Francisco and Garcia-Garcia, Francisco and Carbonell-Caballero, Jos{\'e} and Bleda, Marta and Garc{\'\i}a-Alonso, Luz and Sanchis-Juan, Alba and Perez-Gil, Daniel and Marin-Garcia, Pablo and S{\'a}nchez, Rub{\'e}n and Cubuk, Cankut and Hidalgo, Marta R and Amadoz, Alicia and Hernansaiz-Ballesteros, Rosa D and Alem{\'a}n, Alejandro and T{\'a}rraga, Joaqu{\'\i}n and Montaner, David and Medina, Ignacio and Dopazo, Joaquin} } @article {1115, title = {Involvement of a citrus meiotic recombination TTC-repeat motif in the formation of gross deletions generated by ionizing radiation and MULE activation.}, journal = {BMC genomics}, volume = {16}, year = {2015}, month = {2015 Feb 13}, pages = {69}, abstract = {BACKGROUND: Transposable-element mediated chromosomal rearrangements require the involvement of two transposons and two double-strand breaks (DSB) located in close proximity. In radiobiology, DSB proximity is also a major factor contributing to rearrangements. However, the whole issue of DSB proximity remains virtually unexplored. RESULTS: Based on DNA sequencing analysis we show that the genomes of 2 derived mutations, Arrufatina (sport) and Nero (irradiation), share a similar 2 Mb deletion of chromosome 3. A 7 kb Mutator-like element found in Clemenules was present in Arrufatina in inverted orientation flanking the 5{\textquoteright} end of the deletion. The Arrufatina Mule displayed "dissimilar" 9-bp target site duplications separated by 2 Mb. Fine-scale single nucleotide variant analyses of the deleted fragments identified a TTC-repeat sequence motif located in the center of the deletion responsible of a meiotic crossover detected in the citrus reference genome. CONCLUSIONS: Taken together, this information is compatible with the proposal that in both mutants, the TTC-repeat motif formed a triplex DNA structure generating a loop that brought in close proximity the originally distinct reactive ends. In Arrufatina, the loop brought the Mule ends nearby the 2 distinct insertion target sites and the inverted insertion of the transposable element between these target sites provoked the release of the in-between fragment. This proposal requires the involvement of a unique transposon and sheds light on the unresolved question of how two distinct sites become located in close proximity. These observations confer a crucial role to the TTC-repeats in fundamental plant processes as meiotic recombination and chromosomal rearrangements.}, issn = {1471-2164}, doi = {10.1186/s12864-015-1280-3}, url = {http://www.biomedcentral.com/1471-2164/16/69}, author = {Terol, Javier and Iba{\~n}ez, Victoria and Carbonell, Jos{\'e} and Alonso, Roberto and Estornell, Leandro H and Licciardello, Concetta and Gut, Ivo G and Joaqu{\'\i}n Dopazo and Talon, Manuel} } @article {563, title = {Involvement of a citrus meiotic recombination TTC-repeat motif in the formation of gross deletions generated by ionizing radiation and MULE activation}, journal = {BMC Genomics}, volume = {16}, year = {2015}, month = {Feb}, pages = {69}, abstract = {Transposable-element mediated chromosomal rearrangements require the involvement of two transposons and two double-strand breaks (DSB) located in close proximity. In radiobiology, DSB proximity is also a major factor contributing to rearrangements. However, the whole issue of DSB proximity remains virtually unexplored.}, issn = {1471-2164}, doi = {10.1186/s12864-015-1280-3}, url = {https://doi.org/10.1186/s12864-015-1280-3}, author = {Terol, Javier and Iba{\~n}ez, Victoria and Carbonell, Jos{\'e} and Alonso, Roberto and Estornell, Leandro H. and Licciardello, Concetta and Gut, Ivo G. and Dopazo, Joaquin and Talon, Manuel} } @article {1121, title = {A phylogenetic analysis of 34 chloroplast genomes elucidates the relationships between wild and domestic species within the genus Citrus.}, journal = {Molecular biology and evolution}, volume = {32}, number = {8}, year = {2015}, month = {2015 Apr 14}, pages = {2015-2035}, abstract = {Citrus genus includes some of the most important cultivated fruit trees worldwide. Despite being extensively studied because of its commercial relevance, the origin of cultivated citrus species and the history of its domestication still remain an open question. Here we present a phylogenetic analysis of the chloroplast genomes of 34 citrus genotypes which constitutes the most comprehensive and detailed study to date on the evolution and variability of the genus Citrus. A statistical model was used to estimate divergence times between the major citrus groups. Additionally, a complete map of the variability across the genome of different citrus species was produced, including single nucleotide variants, heteroplasmic positions, indels and large structural variants. The distribution of all these variants provided further independent support to the phylogeny obtained. An unexpected finding was the high level of heteroplasmy found in several of the analysed genomes. The use of the complete chloroplast DNA not only paves the way for a better understanding of the phylogenetic relationships within the Citrus genus, but also provides original insights into other elusive evolutionary processes such as chloroplast inheritance, heteroplasmy and gene selection.}, keywords = {chloroplast, citrus, Phylogeny, WGS}, issn = {1537-1719}, doi = {10.1093/molbev/msv082}, url = {http://mbe.oxfordjournals.org/content/early/2015/04/27/molbev.msv082.full}, author = {Carbonell-Caballero, Jos{\'e} and Alonso, Roberto and Iba{\~n}ez, Victoria and Terol, Javier and Talon, Manuel and Dopazo, Joaquin} } @article {1003, title = {Genome Maps, a new generation genome browser.}, journal = {Nucleic acids research}, volume = {41}, number = {W1}, year = {2013}, month = {2013 Jun 8}, pages = {W41-W46}, abstract = {Genome browsers have gained importance as more genomes and related genomic information become available. However, the increase of information brought about by new generation sequencing technologies is, at the same time, causing a subtle but continuous decrease in the efficiency of conventional genome browsers. Here, we present Genome Maps, a genome browser that implements an innovative model of data transfer and management. The program uses highly efficient technologies from the new HTML5 standard, such as scalable vector graphics, that optimize workloads at both server and client sides and ensure future scalability. Thus, data management and representation are entirely carried out by the browser, without the need of any Java Applet, Flash or other plug-in technology installation. Relevant biological data on genes, transcripts, exons, regulatory features, single-nucleotide polymorphisms, karyotype and so forth, are imported from web services and are available as tracks. In addition, several DAS servers are already included in Genome Maps. As a novelty, this web-based genome browser allows the local upload of huge genomic data files (e.g. VCF or BAM) that can be dynamically visualized in real time at the client side, thus facilitating the management of medical data affected by privacy restrictions. Finally, Genome Maps can easily be integrated in any web application by including only a few lines of code. Genome Maps is an open source collaborative initiative available in the GitHub repository (https://github.com/compbio-bigdata-viz/genome-maps). Genome Maps is available at: http://www.genomemaps.org.}, keywords = {BAM, genome viewer, HTML5, javascript, Next Generation Sequencing, NGS, SVG, VCF}, issn = {1362-4962}, doi = {10.1093/nar/gkt530}, url = {http://nar.oxfordjournals.org/content/41/W1/W41}, author = {Medina, Ignacio and Salavert, Francisco and S{\'a}nchez, Rub{\'e}n and De Maria, Alejandro and Alonso, Roberto and Escobar, Pablo and Bleda, Marta and Joaqu{\'\i}n Dopazo} } @article {512, title = {Discovering the hidden sub-network component in a ranked list of genes or proteins derived from genomic experiments.}, journal = {Nucleic Acids Res}, volume = {40}, year = {2012}, month = {2012 Nov 01}, pages = {e158}, abstract = {

Genomic experiments (e.g. differential gene expression, single-nucleotide polymorphism association) typically produce ranked list of genes. We present a simple but powerful approach which uses protein-protein interaction data to detect sub-networks within such ranked lists of genes or proteins. We performed an exhaustive study of network parameters that allowed us concluding that the average number of components and the average number of nodes per component are the parameters that best discriminate between real and random networks. A novel aspect that increases the efficiency of this strategy in finding sub-networks is that, in addition to direct connections, also connections mediated by intermediate nodes are considered to build up the sub-networks. The possibility of using of such intermediate nodes makes this approach more robust to noise. It also overcomes some limitations intrinsic to experimental designs based on differential expression, in which some nodes are invariant across conditions. The proposed approach can also be used for candidate disease-gene prioritization. Here, we demonstrate the usefulness of the approach by means of several case examples that include a differential expression analysis in Fanconi Anemia, a genome-wide association study of bipolar disorder and a genome-scale study of essentiality in cancer genes. An efficient and easy-to-use web interface (available at http://www.babelomics.org) based on HTML5 technologies is also provided to run the algorithm and represent the network.

}, keywords = {Bipolar Disorder, Fanconi Anemia, Gene Regulatory Networks, Genes, Neoplasm, Genome-Wide Association Study, Genomics, Humans, Protein Interaction Mapping}, issn = {1362-4962}, doi = {10.1093/nar/gks699}, author = {Garc{\'\i}a-Alonso, Luz and Alonso, Roberto and Vidal, Enrique and Amadoz, Alicia and De Maria, Alejandro and Minguez, Pablo and Medina, Ignacio and Dopazo, Joaquin} } @article {517, title = {Inferring the regulatory network behind a gene expression experiment.}, journal = {Nucleic Acids Res}, volume = {40}, year = {2012}, month = {2012 Jul}, pages = {W168-72}, abstract = {

Transcription factors (TFs) and miRNAs are the most important dynamic regulators in the control of gene expression in multicellular organisms. These regulatory elements play crucial roles in development, cell cycling and cell signaling, and they have also been associated with many diseases. The Regulatory Network Analysis Tool (RENATO) web server makes the exploration of regulatory networks easy, enabling a better understanding of functional modularity and network integrity under specific perturbations. RENATO is suitable for the analysis of the result of expression profiling experiments. The program analyses lists of genes and search for the regulators compatible with its activation or deactivation. Tests of single enrichment or gene set enrichment allow the selection of the subset of TFs or miRNAs significantly involved in the regulation of the query genes. RENATO also offers an interactive advanced graphical interface that allows exploring the regulatory network found.RENATO is available at: http://renato.bioinfo.cipf.es/.

}, keywords = {Binding Sites, Databases, Genetic, Fanconi Anemia, Gene Regulatory Networks, Internet, MicroRNAs, Software, Transcription Factors, Transcriptome}, issn = {1362-4962}, doi = {10.1093/nar/gks573}, author = {Bleda, Marta and Medina, Ignacio and Alonso, Roberto and De Maria, Alejandro and Salavert, Francisco and Dopazo, Joaquin} } @article {523, title = {VARIANT: Command Line, Web service and Web interface for fast and accurate functional characterization of variants found by Next-Generation Sequencing.}, journal = {Nucleic Acids Res}, volume = {40}, year = {2012}, month = {2012 Jul}, pages = {W54-8}, abstract = {

The massive use of Next-Generation Sequencing (NGS) technologies is uncovering an unexpected amount of variability. The functional characterization of such variability, particularly in the most common form of variation found, the Single Nucleotide Variants (SNVs), has become a priority that needs to be addressed in a systematic way. VARIANT (VARIant ANalyis Tool) reports information on the variants found that include consequence type and annotations taken from different databases and repositories (SNPs and variants from dbSNP and 1000 genomes, and disease-related variants from the Genome-Wide Association Study (GWAS) catalog, Online Mendelian Inheritance in Man (OMIM), Catalog of Somatic Mutations in Cancer (COSMIC) mutations, etc). VARIANT also produces a rich variety of annotations that include information on the regulatory (transcription factor or miRNA-binding sites, etc.) or structural roles, or on the selective pressures on the sites affected by the variation. This information allows extending the conventional reports beyond the coding regions and expands the knowledge on the contribution of non-coding or synonymous variants to the phenotype studied. Contrarily to other tools, VARIANT uses a remote database and operates through efficient RESTful Web Services that optimize search and transaction operations. In this way, local problems of installation, update or disk size limitations are overcome without the need of sacrifice speed (thousands of variants are processed per minute). VARIANT is available at: http://variant.bioinfo.cipf.es.

}, keywords = {Databases, Nucleic Acid, Genetic Variation, High-Throughput Nucleotide Sequencing, Internet, Molecular Sequence Annotation, mutation, Polymorphism, Single Nucleotide, Software, User-Computer Interface}, issn = {1362-4962}, doi = {10.1093/nar/gks572}, author = {Medina, Ignacio and De Maria, Alejandro and Bleda, Marta and Salavert, Francisco and Alonso, Roberto and Gonzalez, Cristina Y and Dopazo, Joaquin} }