@article {555, title = {PyCellBase, an efficient python package for easy retrieval of biological data from heterogeneous sources.}, journal = {BMC Bioinformatics}, volume = {20}, year = {2019}, month = {2019 Mar 28}, pages = {159}, abstract = {

BACKGROUND: Biological databases and repositories are incrementing in diversity and complexity over the years. This rapid expansion of current and new sources of biological knowledge raises serious problems of data accessibility and integration. To handle the growing necessity of unification, CellBase was created as an integrative solution. CellBase provides a centralized NoSQL database containing biological information from different and heterogeneous sources. Access to this information is done through a RESTful web service API, which provides an efficient interface to the data.

RESULTS: In this work we present PyCellBase, a Python package that provides programmatic access to the rich RESTful web service API offered by CellBase. This package offers a fast and user-friendly access to biological information without the need of installing any local database. In addition, a series of command-line tools are provided to perform common bioinformatic tasks, such as variant annotation. CellBase data is always available by a high-availability cluster and queries have been tuned to ensure a real-time performance.

CONCLUSION: PyCellBase is an open-source Python package that provides an efficient access to heterogeneous biological information. It allows to perform tasks that require a comprehensive set of knowledge resources, as for example variant annotation. Queries can be easily fine-tuned to retrieve the desired information of particular biological features. PyCellBase offers the convenience of an object-oriented scripting language and provides the ability to integrate the obtained results into other Python applications and pipelines.

}, keywords = {Computational Biology, Databases, Factual, Software, User-Computer Interface}, issn = {1471-2105}, doi = {10.1186/s12859-019-2726-4}, author = {Perez-Gil, Daniel and Lopez, Francisco J and Dopazo, Joaquin and Marin-Garcia, Pablo and Rendon, Augusto and Medina, Ignacio} } @article {387, title = {HGVA: the Human Genome Variation Archive.}, journal = {Nucleic Acids Res}, volume = {45}, year = {2017}, month = {2017 07 03}, pages = {W189-W194}, abstract = {

High-profile genomic variation projects like the 1000 Genomes project or the Exome Aggregation Consortium, are generating a wealth of human genomic variation knowledge which can be used as an essential reference for identifying disease-causing genotypes. However, accessing these data, contrasting the various studies and integrating those data in downstream analyses remains cumbersome. The Human Genome Variation Archive (HGVA) tackles these challenges and facilitates access to genomic data for key reference projects in a clean, fast and integrated fashion. HGVA provides an efficient and intuitive web-interface for easy data mining, a comprehensive RESTful API and client libraries in Python, Java and JavaScript for fast programmatic access to its knowledge base. HGVA calculates population frequencies for these projects and enriches their data with variant annotation provided by CellBase, a rich and fast annotation solution. HGVA serves as a proof-of-concept of the genome analysis developments being carried out by the University of Cambridge together with UK{\textquoteright}s 100 000 genomes project and the National Institute for Health Research BioResource Rare-Diseases, in particular, deploying open-source for Computational Biology (OpenCB) software platform for storing and analyzing massive genomic datasets.

}, keywords = {Genetic Variation, Genome, Human, Humans, Internet, Software, User-Computer Interface}, issn = {1362-4962}, doi = {10.1093/nar/gkx445}, url = {https://academic.oup.com/nar/article-lookup/doi/10.1093/nar/gkx445}, author = {Lopez, Javier and Coll, Jacobo and Haimel, Matthias and Kandasamy, Swaathi and T{\'a}rraga, Joaqu{\'\i}n and Furio-Tari, Pedro and Bari, Wasim and Bleda, Marta and Rueda, Antonio and Gr{\"a}f, Stefan and Rendon, Augusto and Dopazo, Joaquin and Medina, Ignacio} } @article {382, title = {VISMapper: ultra-fast exhaustive cartography of viral insertion sites for gene therapy.}, journal = {BMC Bioinformatics}, volume = {18}, year = {2017}, month = {2017 Sep 20}, pages = {421}, abstract = {

BACKGROUND: The possibility of integrating viral vectors to become a persistent part of the host genome makes them a crucial element of clinical gene therapy. However, viral integration has associated risks, such as the unintentional activation of oncogenes that can result in cancer. Therefore, the analysis of integration sites of retroviral vectors is a crucial step in developing safer vectors for therapeutic use.

RESULTS: Here we present VISMapper, a vector integration site analysis web server, to analyze next-generation sequencing data for retroviral vector integration sites. VISMapper can be found at: http://vismapper.babelomics.org .

CONCLUSIONS: Because it uses novel mapping algorithms VISMapper is remarkably faster than previous available programs. It also provides a useful graphical interface to analyze the integration sites found in the genomic context.

}, keywords = {Base Sequence, Genetic Therapy, Genetic Vectors, High-Throughput Nucleotide Sequencing, Humans, Internet, User-Computer Interface, Virus Integration}, issn = {1471-2105}, doi = {10.1186/s12859-017-1837-z}, author = {Juanes, Jos{\'e} M and Gallego, Asunci{\'o}n and T{\'a}rraga, Joaqu{\'\i}n and Chaves, Felipe J and Marin-Garcia, Pablo and Medina, Ignacio and Arnau, Vicente and Dopazo, Joaquin} } @article {1203, title = {Actionable pathways: interactive discovery of therapeutic targets using signaling pathway models.}, journal = {Nucleic acids research}, year = {2016}, month = {2016 May 2}, abstract = {The discovery of actionable targets is crucial for targeted therapies and is also a constituent part of the drug discovery process. The success of an intervention over a target depends critically on its contribution, within the complex network of gene interactions, to the cellular processes responsible for disease progression or therapeutic response. Here we present PathAct, a web server that predicts the effect that interventions over genes (inhibitions or activations that simulate knock-outs, drug treatments or over-expressions) can have over signal transmission within signaling pathways and, ultimately, over the cell functionalities triggered by them. PathAct implements an advanced graphical interface that provides a unique interactive working environment in which the suitability of potentially actionable genes, that could eventually become drug targets for personalized or individualized therapies, can be easily tested. The PathAct tool can be found at: http://pathact.babelomics.org.}, keywords = {actionable genes, Disease mechanism, drug action mechanism, Drug discovery, pathway analysis, personalized medicine, signalling, therapeutic targets}, issn = {1362-4962}, doi = {10.1093/nar/gkw369}, url = {http://nar.oxfordjournals.org/content/early/2016/05/02/nar.gkw369.full}, author = {Salavert, Francisco and Hidago, Marta R and Amadoz, Alicia and Cubuk, Cankut and Medina, Ignacio and Crespo, Daniel and Carbonell-Caballero, Jos{\'e} and Joaqu{\'\i}n Dopazo} } @article {560, title = {HPG pore: an efficient and scalable framework for nanopore sequencing data}, journal = {BMC Bioinformatics}, volume = {17}, year = {2016}, month = {Jan-12-2016}, doi = {10.1186/s12859-016-0966-0}, url = {http://www.biomedcentral.com/1471-2105/17/107http://link.springer.com/content/pdf/10.1186/s12859-016-0966-0}, author = {T{\'a}rraga, Joaqu{\'\i}n and Gallego, Asunci{\'o}n and Arnau, Vicente and Medina, Ignacio and Dopazo, Joaquin} } @article {1195, title = {HPG pore: an efficient and scalable framework for nanopore sequencing data.}, journal = {BMC bioinformatics}, volume = {17}, year = {2016}, month = {2016}, pages = {107}, abstract = {BACKGROUND: The use of nanopore technologies is expected to spread in the future because they are portable and can sequence long fragments of DNA molecules without prior amplification. The first nanopore sequencer available, the MinION{\texttrademark} from Oxford Nanopore Technologies, is a USB-connected, portable device that allows real-time DNA analysis. In addition, other new instruments are expected to be released soon, which promise to outperform the current short-read technologies in terms of throughput. Despite the flood of data expected from this technology, the data analysis solutions currently available are only designed to manage small projects and are not scalable. RESULTS: Here we present HPG Pore, a toolkit for exploring and analysing nanopore sequencing data. HPG Pore can run on both individual computers and in the Hadoop distributed computing framework, which allows easy scale-up to manage the large amounts of data expected to result from extensive use of nanopore technologies in the future. CONCLUSIONS: HPG Pore allows for virtually unlimited sequencing data scalability, thus guaranteeing its continued management in near future scenarios. HPG Pore is available in GitHub at http://github.com/opencb/hpg-pore .}, keywords = {hadoop, HPC, nanopore, NGS}, issn = {1471-2105}, doi = {10.1186/s12859-016-0966-0}, url = {http://www.biomedcentral.com/1471-2105/17/107}, author = {T{\'a}rraga, Joaqu{\'\i}n and Gallego, Asunci{\'o}n and Arnau, Vicente and Medina, Ignacio and Dopazo, Joaquin} } @article {438, title = {Web-based network analysis and visualization using CellMaps.}, journal = {Bioinformatics}, volume = {32}, year = {2016}, month = {2016 10 01}, pages = {3041-3}, abstract = {

UNLABELLED: : CellMaps is an HTML5 open-source web tool that allows displaying, editing, exploring and analyzing biological networks as well as integrating metadata into them. Computations and analyses are remotely executed in high-end servers, and all the functionalities are available through RESTful web services. CellMaps can easily be integrated in any web page by using an available JavaScript API.

AVAILABILITY AND IMPLEMENTATION: The application is available at: http://cellmaps.babelomics.org/ and the code can be found in: https://github.com/opencb/cell-maps The client is implemented in JavaScript and the server in C and Java.

CONTACT: jdopazo@cipf.es

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.

}, keywords = {Biochemical Phenomena, Internet, Software}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btw332}, author = {Salavert, Francisco and Garc{\'\i}a-Alonso, Luz and S{\'a}nchez, Rub{\'e}n and Alonso, Roberto and Bleda, Marta and Medina, Ignacio and Dopazo, Joaquin} } @article {1128, title = {Assessing the impact of mutations found in next generation sequencing data over human signaling pathways.}, journal = {Nucleic acids research}, volume = {43}, number = {W1}, year = {2015}, month = {2015 Apr 16}, pages = {W270-W275}, abstract = {Modern sequencing technologies produce increasingly detailed data on genomic variation. However, conventional methods for relating either individual variants or mutated genes to phenotypes present known limitations given the complex, multigenic nature of many diseases or traits. Here we present PATHiVar, a web-based tool that integrates genomic variation data with gene expression tissue information. PATHiVar constitutes a new generation of genomic data analysis methods that allow studying variants found in next generation sequencing experiment in the context of signaling pathways. Simple Boolean models of pathways provide detailed descriptions of the impact of mutations in cell functionality so as, recurrences in functionality failures can easily be related to diseases, even if they are produced by mutations in different genes. Patterns of changes in signal transmission circuits, often unpredictable from individual genes mutated, correspond to patterns of affected functionalities that can be related to complex traits such as disease progression, drug response, etc. PATHiVar is available at: http://pathivar.babelomics.org.}, keywords = {NGS, pathways, signalling, Systems biology}, issn = {1362-4962}, doi = {10.1093/nar/gkv349}, url = {http://nar.oxfordjournals.org/content/43/W1/W270}, author = {Hernansaiz-Ballesteros, Rosa D and Salavert, Francisco and Sebasti{\'a}n-Leon, Patricia and Alem{\'a}n, Alejandro and Medina, Ignacio and Joaqu{\'\i}n Dopazo} } @article {1129, title = {Babelomics 5.0: functional interpretation for new generations of genomic data.}, journal = {Nucleic acids research}, volume = {43}, number = {W1}, year = {2015}, month = {2015 Apr 20}, pages = {W117-W121}, abstract = {Babelomics has been running for more than one decade offering a user-friendly interface for the functional analysis of gene expression and genomic data. Here we present its fifth release, which includes support for Next Generation Sequencing data including gene expression (RNA-seq), exome or genome resequencing. Babelomics has simplified its interface, being now more intuitive. Improved visualization options, such as a genome viewer as well as an interactive network viewer, have been implemented. New technical enhancements at both, client and server sides, makes the user experience faster and more dynamic. Babelomics offers user-friendly access to a full range of methods that cover: (i) primary data analysis, (ii) a variety of tests for different experimental designs and (iii) different enrichment and network analysis algorithms for the interpretation of the results of such tests in the proper functional context. In addition to the public server, local copies of Babelomics can be downloaded and installed. Babelomics is freely available at: http://www.babelomics.org.}, keywords = {babelomics, data integration, gene set analysis, interactome, network analysis, NGS, RNA-seq, Systems biology, transcriptomics}, issn = {1362-4962}, doi = {10.1093/nar/gkv384}, url = {http://nar.oxfordjournals.org/content/43/W1/W117}, author = {Alonso, Roberto and Salavert, Francisco and Garcia-Garcia, Francisco and Carbonell-Caballero, Jos{\'e} and Bleda, Marta and Garc{\'\i}a-Alonso, Luz and Sanchis-Juan, Alba and Perez-Gil, Daniel and Marin-Garcia, Pablo and S{\'a}nchez, Rub{\'e}n and Cubuk, Cankut and Hidalgo, Marta R and Amadoz, Alicia and Hernansaiz-Ballesteros, Rosa D and Alem{\'a}n, Alejandro and T{\'a}rraga, Joaqu{\'\i}n and Montaner, David and Medina, Ignacio and Dopazo, Joaquin} } @article {1162, title = {Concurrent and Accurate Short Read Mapping on Multicore Processors.}, journal = {IEEE/ACM transactions on computational biology and bioinformatics / IEEE, ACM}, volume = {12}, year = {2015}, month = {2015 Sep-Oct}, pages = {995-1007}, abstract = {We introduce a parallel aligner with a work-flow organization for fast and accurate mapping of RNA sequences on servers equipped with multicore processors. Our software, [Formula: see text] ([Formula: see text] is an open-source application. The software is available at http://www.opencb.org, exploits a suffix array to rapidly map a large fraction of the RNA fragments (reads), as well as leverages the accuracy of the Smith-Waterman algorithm to deal with conflictive reads. The aligner is enhanced with a careful strategy to detect splice junctions based on an adaptive division of RNA reads into small segments (or seeds), which are then mapped onto a number of candidate alignment locations, providing crucial information for the successful alignment of the complete reads. The experimental results on a platform with Intel multicore technology report the parallel performance of [Formula: see text], on RNA reads of 100-400 nucleotides, which excels in execution time/sensitivity to state-of-the-art aligners such as TopHat 2+Bowtie 2, MapSplice, and STAR.}, keywords = {HPC, NGS, short real mapping}, issn = {1557-9964}, doi = {10.1109/TCBB.2015.2392077}, url = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp?tp=\&arnumber=7010005}, author = {Martinez, Hector and T{\'a}rraga, Joaqu{\'\i}n and Medina, Ignacio and Barrachina, Sergio and Castillo, Maribel and Dopazo, Joaquin and Quintana-Orti, Enrique S} } @article {472, title = {Fast inexact mapping using advanced tree exploration on backward search methods.}, journal = {BMC Bioinformatics}, volume = {16}, year = {2015}, month = {2015 Jan 28}, pages = {18}, abstract = {

BACKGROUND: Short sequence mapping methods for Next Generation Sequencing consist on a combination of seeding techniques followed by local alignment based on dynamic programming approaches. Most seeding algorithms are based on backward search alignment, using the Burrows Wheeler Transform, the Ferragina and Manzini Index or Suffix Arrays. All these backward search algorithms have excellent performance, but their computational cost highly increases when allowing errors. In this paper, we discuss an inexact mapping algorithm based on pruning strategies for search tree exploration over genomic data.

RESULTS: The proposed algorithm achieves a 13x speed-up over similar algorithms when allowing 6 base errors, including insertions, deletions and mismatches. This algorithm can deal with 400 bps reads with up to 9 errors in a high quality Illumina dataset. In this example, the algorithm works as a preprocessor that reduces by 55\% the number of reads to be aligned. Depending on the aligner the overall execution time is reduced between 20-40\%.

CONCLUSIONS: Although not intended as a complete sequence mapping tool, the proposed algorithm could be used as a preprocessing step to modern sequence mappers. This step significantly reduces the number reads to be aligned, accelerating overall alignment time. Furthermore, this algorithm could be used for accelerating the seeding step of already available sequence mappers. In addition, an out-of-core index has been implemented for working with large genomes on systems without expensive memory configurations.

}, keywords = {Algorithms, Genome, Human, Genomics, High-Throughput Nucleotide Sequencing, Humans, Sequence Alignment, Sequence Analysis, DNA, Software}, issn = {1471-2105}, doi = {10.1186/s12859-014-0438-3}, author = {Salavert, Jos{\'e} and Tom{\'a}s, Andr{\'e}s and T{\'a}rraga, Joaqu{\'\i}n and Medina, Ignacio and Dopazo, Joaquin and Blanquer, Ignacio} } @article {1179, title = {Identification of epistatic interactions through genome-wide association studies in sporadic medullary and juvenile papillary thyroid carcinomas.}, journal = {BMC medical genomics}, volume = {8}, year = {2015}, month = {2015}, pages = {83}, abstract = {BACKGROUND: The molecular mechanisms leading to sporadic medullary thyroid carcinoma (sMTC) and juvenile papillary thyroid carcinoma (PTC), two rare tumours of the thyroid gland, remain poorly understood. Genetic studies on thyroid carcinomas have been conducted, although just a few loci have been systematically associated. Given the difficulties to obtain single-loci associations, this work expands its scope to the study of epistatic interactions that could help to understand the genetic architecture of complex diseases and explain new heritable components of genetic risk. METHODS: We carried out the first screening for epistasis by Multifactor-Dimensionality Reduction (MDR) in genome-wide association study (GWAS) on sMTC and juvenile PTC, to identify the potential simultaneous involvement of pairs of variants in the disease. RESULTS: We have identified two significant epistatic gene interactions in sMTC (CHFR-AC016582.2 and C8orf37-RNU1-55P) and three in juvenile PTC (RP11-648k4.2-DIO1, RP11-648k4.2-DMGDH and RP11-648k4.2-LOXL1). Interestingly, each interacting gene pair included a non-coding RNA, providing thus support to the relevance that these elements are increasingly gaining to explain carcinoma development and progression. CONCLUSIONS: Overall, this study contributes to the understanding of the genetic basis of thyroid carcinoma susceptibility in two different case scenarios such as sMTC and juvenile PTC.}, keywords = {epistasis, GWAS, Thyroid cancer}, issn = {1755-8794}, doi = {10.1186/s12920-015-0160-7}, url = {http://bmcmedgenomics.biomedcentral.com/articles/10.1186/s12920-015-0160-7}, author = {Luz{\'o}n-Toro, Berta and Bleda, Marta and Navarro, Elena and Garc{\'\i}a-Alonso, Luz and Ruiz-Ferrer, Macarena and Medina, Ignacio and Mart{\'\i}n-S{\'a}nchez, Marta and Gonzalez, Cristina Y and Fern{\'a}ndez, Raquel M and Torroglosa, Ana and Anti{\v n}olo, Guillermo and Dopazo, Joaquin and Borrego, Salud} } @article {562, title = {Identification of epistatic interactions through genome-wide association studies in sporadic medullary and juvenile papillary thyroid carcinomas}, journal = {BMC Medical Genomics}, volume = {8}, year = {2015}, month = {Dec}, pages = {83}, abstract = {The molecular mechanisms leading to sporadic medullary thyroid carcinoma (sMTC) and juvenile papillary thyroid carcinoma (PTC), two rare tumours of the thyroid gland, remain poorly understood. Genetic studies on thyroid carcinomas have been conducted, although just a few loci have been systematically associated. Given the difficulties to obtain single-loci associations, this work expands its scope to the study of epistatic interactions that could help to understand the genetic architecture of complex diseases and explain new heritable components of genetic risk.}, issn = {1755-8794}, doi = {10.1186/s12920-015-0160-7}, url = {https://doi.org/10.1186/s12920-015-0160-7}, author = {Luz{\'o}n-Toro, Berta and Bleda, Marta and Navarro, Elena and Garc{\'\i}a-Alonso, Luz and Ruiz-Ferrer, Macarena and Medina, Ignacio and Mart{\'\i}n-S{\'a}nchez, Marta and Gonzalez, Cristina Y. and Fern{\'a}ndez, Raquel M. and Torroglosa, Ana and Anti{\v n}olo, Guillermo and Dopazo, Joaquin and Borrego, Salud} } @article {1149, title = {A Parallel and Sensitive Software Tool for Methylation Analysis on Multicore Platforms.}, journal = {Bioinformatics (Oxford, England)}, volume = {31}, number = {19}, year = {2015}, month = {2015 Jun 10}, pages = {3130-3138}, abstract = {MOTIVATION: DNA methylation analysis suffers from very long processing time, since the advent of Next-Generation Sequencers (NGS) has shifted the bottleneck of genomic studies from the sequencers that obtain the DNA samples to the software that performs the analysis of these samples. The existing software for methylation analysis does not seem to scale efficiently neither with the size of the dataset nor with the length of the reads to be analyzed. Since it is expected that the sequencers will provide longer and longer reads in the near future, efficient and scalable methylation software should be developed. RESULTS: We present a new software tool, called HPG-Methyl, which efficiently maps bisulfite sequencing reads on DNA, analyzing DNA methylation. The strategy used by this software consists of leveraging the speed of the Burrows-Wheeler Transform to map a large number of DNA fragments (reads) rapidly, as well as the accuracy of the Smith-Waterman algorithm, which is exclusively employed to deal with the most ambiguous and shortest reads. Experimental results on platforms with Intel multicore processors show that HPGMethyl significantly outperforms in both execution time and sensitivity state-of-the-art software such as Bismark, BS-Seeker or BSMAP, particularly for long bisulfite reads. AVAILABILITY: Software in the form of C libraries and functions, together with instructions to compile and execute this software. Available by sftp to anonymous@clariano.uv.es (password "anonymous"). CONTACT: Juan.Orduna@uv.es.}, keywords = {BS-seq, HPC, methylation, NGS}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btv357}, url = {http://bioinformatics.oxfordjournals.org/content/31/19/3130.long}, author = {T{\'a}rraga, Joaqu{\'\i}n and P{\'e}rez, Mariano and Ordu{\~n}a, Juan M and Duato, Jos{\'e} and Medina, Ignacio and Joaqu{\'\i}n Dopazo} } @article {1076, title = {Acceleration of short and long DNA read mapping without loss of accuracy using suffix array.}, journal = {Bioinformatics (Oxford, England)}, volume = {30}, year = {2014}, month = {2014 Aug 20}, pages = {3396-3398}, abstract = {HPG Aligner applies suffix arrays for DNA read mapping. This implementation produces a highly sensitive and extremely fast mapping of DNA reads that scales up almost linearly with read length. The approach presented here is faster (over 20x for long reads) and more sensitive (over 98\% in a wide range of read lengths) than the current, state-of-the-art mappers. HPG Aligner is not only an optimal alternative for current sequencers but also the only solution available to cope with longer reads and growing throughputs produced by forthcoming sequencing technologies.}, keywords = {NGS, short read mapping. HPC. suffix arrays}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btu553}, url = {http://bioinformatics.oxfordjournals.org/content/early/2014/08/19/bioinformatics.btu553.long}, author = {T{\'a}rraga, Joaqu{\'\i}n and Arnau, Vicente and Martinez, Hector and Moreno, Raul and Cazorla, Diego and Salavert-Torres, Jos{\'e} and Blanquer-Espert, Ignacio and Joaqu{\'\i}n Dopazo and Medina, Ignacio} } @article {1058, title = {A web tool for the design and management of panels of genes for targeted enrichment and massive sequencing for clinical applications.}, journal = {Nucleic acids research}, volume = {42}, year = {2014}, month = {2014 May 26}, pages = {W83-W87}, abstract = {Disease targeted sequencing is gaining importance as a powerful and cost-effective application of high throughput sequencing technologies to the diagnosis. However, the lack of proper tools to process the data hinders its extensive adoption. Here we present TEAM, an intuitive and easy-to-use web tool that fills the gap between the predicted mutations and the final diagnostic in targeted enrichment sequencing analysis. The tool searches for known diagnostic mutations, corresponding to a disease panel, among the predicted patient{\textquoteright}s variants. Diagnostic variants for the disease are taken from four databases of disease-related variants (HGMD-public, HUMSAVAR, ClinVar and COSMIC.) If no primary diagnostic variant is found, then a list of secondary findings that can help to establish a diagnostic is produced. TEAM also provides with an interface for the definition of and customization of panels, by means of which, genes and mutations can be added or discarded to adjust panel definitions. TEAM is freely available at: http://team.babelomics.org.}, keywords = {Diagnostic, Targeted enrichment sequencing, WES}, issn = {1362-4962}, doi = {10.1093/nar/gku472}, url = {http://nar.oxfordjournals.org/cgi/pmidlookup?view=long\&pmid=24861626}, author = {Alem{\'a}n, Alejandro and Garcia-Garcia, Francisco and Medina, Ignacio and Joaqu{\'\i}n Dopazo} } @article {1051, title = {A web-based interactive framework to assist in the prioritization of disease candidate genes in whole-exome sequencing studies.}, journal = {Nucleic acids research}, volume = {42}, year = {2014}, month = {2014 May 6}, pages = {W88-W93.}, abstract = {Whole-exome sequencing has become a fundamental tool for the discovery of disease-related genes of familial diseases and the identification of somatic driver variants in cancer. However, finding the causal mutation among the enormous background of individual variability in a small number of samples is still a big challenge. Here we describe a web-based tool, BiERapp, which efficiently helps in the identification of causative variants in family and sporadic genetic diseases. The program reads lists of predicted variants (nucleotide substitutions and indels) in affected individuals or tumor samples and controls. In family studies, different modes of inheritance can easily be defined to filter out variants that do not segregate with the disease along the family. Moreover, BiERapp integrates additional information such as allelic frequencies in the general population and the most popular damaging scores to further narrow down the number of putative variants in successive filtering steps. BiERapp provides an interactive and user-friendly interface that implements the filtering strategy used in the context of a large-scale genomic project carried out by the Spanish Network for Research in Rare Diseases (CIBERER) in which more than 800 exomes have been analyzed. BiERapp is freely available at: http://bierapp.babelomics.org/}, keywords = {NGS. prioritization}, issn = {1362-4962}, doi = {10.1093/nar/gku407}, url = {http://nar.oxfordjournals.org/content/42/W1/W88}, author = {Alem{\'a}n, Alejandro and Garcia-Garcia, Francisco and Salavert, Francisco and Medina, Ignacio and Joaqu{\'\i}n Dopazo} } @article {1003, title = {Genome Maps, a new generation genome browser.}, journal = {Nucleic acids research}, volume = {41}, number = {W1}, year = {2013}, month = {2013 Jun 8}, pages = {W41-W46}, abstract = {Genome browsers have gained importance as more genomes and related genomic information become available. However, the increase of information brought about by new generation sequencing technologies is, at the same time, causing a subtle but continuous decrease in the efficiency of conventional genome browsers. Here, we present Genome Maps, a genome browser that implements an innovative model of data transfer and management. The program uses highly efficient technologies from the new HTML5 standard, such as scalable vector graphics, that optimize workloads at both server and client sides and ensure future scalability. Thus, data management and representation are entirely carried out by the browser, without the need of any Java Applet, Flash or other plug-in technology installation. Relevant biological data on genes, transcripts, exons, regulatory features, single-nucleotide polymorphisms, karyotype and so forth, are imported from web services and are available as tracks. In addition, several DAS servers are already included in Genome Maps. As a novelty, this web-based genome browser allows the local upload of huge genomic data files (e.g. VCF or BAM) that can be dynamically visualized in real time at the client side, thus facilitating the management of medical data affected by privacy restrictions. Finally, Genome Maps can easily be integrated in any web application by including only a few lines of code. Genome Maps is an open source collaborative initiative available in the GitHub repository (https://github.com/compbio-bigdata-viz/genome-maps). Genome Maps is available at: http://www.genomemaps.org.}, keywords = {BAM, genome viewer, HTML5, javascript, Next Generation Sequencing, NGS, SVG, VCF}, issn = {1362-4962}, doi = {10.1093/nar/gkt530}, url = {http://nar.oxfordjournals.org/content/41/W1/W41}, author = {Medina, Ignacio and Salavert, Francisco and S{\'a}nchez, Rub{\'e}n and De Maria, Alejandro and Alonso, Roberto and Escobar, Pablo and Bleda, Marta and Joaqu{\'\i}n Dopazo} } @article {500, title = {Inferring the functional effect of gene expression changes in signaling pathways.}, journal = {Nucleic Acids Res}, volume = {41}, year = {2013}, month = {2013 Jul}, pages = {W213-7}, abstract = {

Signaling pathways constitute a valuable source of information that allows interpreting the way in which alterations in gene activities affect to particular cell functionalities. There are web tools available that allow viewing and editing pathways, as well as representing experimental data on them. However, few methods aimed to identify the signaling circuits, within a pathway, associated to the biological problem studied exist and none of them provide a convenient graphical web interface. We present PATHiWAYS, a web-based signaling pathway visualization system that infers changes in signaling that affect cell functionality from the measurements of gene expression values in typical expression microarray case-control experiments. A simple probabilistic model of the pathway is used to estimate the probabilities for signal transmission from any receptor to any final effector molecule (taking into account the pathway topology) using for this the individual probabilities of gene product presence/absence inferred from gene expression values. Significant changes in these probabilities allow linking different cell functionalities triggered by the pathway to the biological problem studied. PATHiWAYS is available at: http://pathiways.babelomics.org/.

}, keywords = {Animals, Humans, Internet, Mice, Models, Statistical, Receptors, Cell Surface, Signal Transduction, Software, Transcriptome}, issn = {1362-4962}, doi = {10.1093/nar/gkt451}, author = {Sebasti{\'a}n-Leon, Patricia and Carbonell, Jos{\'e} and Salavert, Francisco and S{\'a}nchez, Rub{\'e}n and Medina, Ignacio and Dopazo, Joaquin} } @article {916, title = {CellBase, a comprehensive collection of RESTful web services for retrieving relevant biological information from heterogeneous sources.}, journal = {Nucleic acids research}, volume = {40}, year = {2012}, month = {2012 Jul}, pages = {W609-14}, abstract = {During the past years, the advances in high-throughput technologies have produced an unprecedented growth in the number and size of repositories and databases storing relevant biological data. Today, there is more biological information than ever but, unfortunately, the current status of many of these repositories is far from being optimal. Some of the most common problems are that the information is spread out in many small databases; frequently there are different standards among repositories and some databases are no longer supported or they contain too specific and unconnected information. In addition, data size is increasingly becoming an obstacle when accessing or storing biological data. All these issues make very difficult to extract and integrate information from different sources, to analyze experiments or to access and query this information in a programmatic way. CellBase provides a solution to the growing necessity of integration by easing the access to biological data. CellBase implements a set of RESTful web services that query a centralized database containing the most relevant biological data sources. The database is hosted in our servers and is regularly updated. CellBase documentation can be found at http://docs.bioinfo.cipf.es/projects/cellbase.}, issn = {1362-4962}, doi = {10.1093/nar/gks575}, url = {http://nar.oxfordjournals.org/content/40/W1/W609.long}, author = {Bleda, Marta and T{\'a}rraga, Joaqu{\'\i}n and De Maria, Alejandro and Salavert, Francisco and Garc{\'\i}a-Alonso, Luz and Celma, Matilde and Martin, Ainoha and Dopazo, Joaquin and Medina, Ignacio} } @article {512, title = {Discovering the hidden sub-network component in a ranked list of genes or proteins derived from genomic experiments.}, journal = {Nucleic Acids Res}, volume = {40}, year = {2012}, month = {2012 Nov 01}, pages = {e158}, abstract = {

Genomic experiments (e.g. differential gene expression, single-nucleotide polymorphism association) typically produce ranked list of genes. We present a simple but powerful approach which uses protein-protein interaction data to detect sub-networks within such ranked lists of genes or proteins. We performed an exhaustive study of network parameters that allowed us concluding that the average number of components and the average number of nodes per component are the parameters that best discriminate between real and random networks. A novel aspect that increases the efficiency of this strategy in finding sub-networks is that, in addition to direct connections, also connections mediated by intermediate nodes are considered to build up the sub-networks. The possibility of using of such intermediate nodes makes this approach more robust to noise. It also overcomes some limitations intrinsic to experimental designs based on differential expression, in which some nodes are invariant across conditions. The proposed approach can also be used for candidate disease-gene prioritization. Here, we demonstrate the usefulness of the approach by means of several case examples that include a differential expression analysis in Fanconi Anemia, a genome-wide association study of bipolar disorder and a genome-scale study of essentiality in cancer genes. An efficient and easy-to-use web interface (available at http://www.babelomics.org) based on HTML5 technologies is also provided to run the algorithm and represent the network.

}, keywords = {Bipolar Disorder, Fanconi Anemia, Gene Regulatory Networks, Genes, Neoplasm, Genome-Wide Association Study, Genomics, Humans, Protein Interaction Mapping}, issn = {1362-4962}, doi = {10.1093/nar/gks699}, author = {Garc{\'\i}a-Alonso, Luz and Alonso, Roberto and Vidal, Enrique and Amadoz, Alicia and De Maria, Alejandro and Minguez, Pablo and Medina, Ignacio and Dopazo, Joaquin} } @article {944, title = {Four new loci associations discovered by pathway-based and network analyses of the genome-wide variability profile of Hirschsprung{\textquoteright}s disease.}, journal = {Orphanet journal of rare diseases}, volume = {7}, year = {2012}, month = {2012 Dec 28}, pages = {103}, abstract = {ABSTRACT: Finding gene associations in rare diseases is frequently hampered by the reduced numbers of patients accessible. Conventional gene-based association tests rely on the availability of large cohorts, which constitutes a serious limitation for its application in this scenario. To overcome this problem we have used here a combined strategy in which a pathway-based analysis (PBA) has been initially conducted to prioritize candidate genes in a Spanish cohort of 53 trios of short-segment Hirschsprung{\textquoteright}s disease. Candidate genes have been further validated in an independent population of 106 trios. The study revealed a strong association of 11 gene ontology (GO) modules related to signal transduction and its regulation, enteric nervous system (ENS) formation and other HSCR-related processes. Among the preselected candidates, a total of 4 loci, RASGEF1A, IQGAP2, DLC1 and CHRNA7, related to signal transduction and migration processes, were found to be significantly associated to HSCR. Network analysis also confirms their involvement in the network of already known disease genes. This approach, based on the study of functionally-related gene sets, requires of lower sample sizes and opens new opportunities for the study of rare diseases.}, issn = {1750-1172}, doi = {10.1186/1750-1172-7-103}, url = {http://www.ojrd.com/content/7/1/103/abstract}, author = {Fern{\'a}ndez, Raquel Ma and Bleda, Marta and N{\'u}{\~n}ez-Torres, Roc{\'\i}o and Medina, Ignacio and Luz{\'o}n-Toro, Berta and Garc{\'\i}a-Alonso, Luz and Torroglosa, Ana and Marb{\`a}, Martina and Enguix-Riego, Ma Valle and Montaner, David and Anti{\v n}olo, Guillermo and Joaqu{\'\i}n Dopazo and Borrego, Salud} } @article {515, title = {Four new loci associations discovered by pathway-based and network analyses of the genome-wide variability profile of Hirschsprung{\textquoteright}s disease.}, journal = {Orphanet J Rare Dis}, volume = {7}, year = {2012}, month = {2012 Dec 28}, pages = {103}, abstract = {

Finding gene associations in rare diseases is frequently hampered by the reduced numbers of patients accessible. Conventional gene-based association tests rely on the availability of large cohorts, which constitutes a serious limitation for its application in this scenario. To overcome this problem we have used here a combined strategy in which a pathway-based analysis (PBA) has been initially conducted to prioritize candidate genes in a Spanish cohort of 53 trios of short-segment Hirschsprung{\textquoteright}s disease. Candidate genes have been further validated in an independent population of 106 trios. The study revealed a strong association of 11 gene ontology (GO) modules related to signal transduction and its regulation, enteric nervous system (ENS) formation and other HSCR-related processes. Among the preselected candidates, a total of 4 loci, RASGEF1A, IQGAP2, DLC1 and CHRNA7, related to signal transduction and migration processes, were found to be significantly associated to HSCR. Network analysis also confirms their involvement in the network of already known disease genes. This approach, based on the study of functionally-related gene sets, requires of lower sample sizes and opens new opportunities for the study of rare diseases.

}, keywords = {Female, Genetic Predisposition to Disease, Genome-Wide Association Study, Genotype, Hirschsprung Disease, Humans, Male}, issn = {1750-1172}, doi = {10.1186/1750-1172-7-103}, author = {Fern{\'a}ndez, Raquel Ma and Bleda, Marta and N{\'u}{\~n}ez-Torres, Roc{\'\i}o and Medina, Ignacio and Luz{\'o}n-Toro, Berta and Garc{\'\i}a-Alonso, Luz and Torroglosa, Ana and Marb{\`a}, Martina and Enguix-Riego, Ma Valle and Montaner, David and Anti{\v n}olo, Guillermo and Dopazo, Joaquin and Borrego, Salud} } @article {517, title = {Inferring the regulatory network behind a gene expression experiment.}, journal = {Nucleic Acids Res}, volume = {40}, year = {2012}, month = {2012 Jul}, pages = {W168-72}, abstract = {

Transcription factors (TFs) and miRNAs are the most important dynamic regulators in the control of gene expression in multicellular organisms. These regulatory elements play crucial roles in development, cell cycling and cell signaling, and they have also been associated with many diseases. The Regulatory Network Analysis Tool (RENATO) web server makes the exploration of regulatory networks easy, enabling a better understanding of functional modularity and network integrity under specific perturbations. RENATO is suitable for the analysis of the result of expression profiling experiments. The program analyses lists of genes and search for the regulators compatible with its activation or deactivation. Tests of single enrichment or gene set enrichment allow the selection of the subset of TFs or miRNAs significantly involved in the regulation of the query genes. RENATO also offers an interactive advanced graphical interface that allows exploring the regulatory network found.RENATO is available at: http://renato.bioinfo.cipf.es/.

}, keywords = {Binding Sites, Databases, Genetic, Fanconi Anemia, Gene Regulatory Networks, Internet, MicroRNAs, Software, Transcription Factors, Transcriptome}, issn = {1362-4962}, doi = {10.1093/nar/gks573}, author = {Bleda, Marta and Medina, Ignacio and Alonso, Roberto and De Maria, Alejandro and Salavert, Francisco and Dopazo, Joaquin} } @article {920, title = {A map of human microRNA variation uncovers unexpectedly high levels of variability.}, journal = {Genome medicine}, volume = {4}, year = {2012}, month = {2012 Aug 20}, pages = {62}, abstract = {ABSTRACT: BACKGROUND: MicroRNAs (miRNAs) are key components of the gene regulatory network in many species. During the past few years, these regulatory elements have been shown to be involved in an increasing number and range of diseases. Consequently, the compilation of a comprehensive map of natural variability in healthy population seems an obvious requirement for future research on miRNA-related pathologies. METHODS: Data on 14 populations from the 1000 Genomes Project were analysed, along with new data extracted from 60 exomes of healthy individuals from a southern Spain population, sequenced in the context of the Medical Genome Project, to derive an accurate map of miRNA variability. RESULTS: Despite the common belief that miRNAs are highly conserved elements, analysis of the sequences of the 1,152 individuals indicated that the observed level of variability is double what was expected. A total of 527 variants were found. Among these, 45 variants affected the recognition region of the corresponding miRNA and were found in 43 different miRNAs, 26 of which are known to be involved in 57 diseases. Different parts of the mature structure of the miRNA were affected to different degrees by variants, which suggests the existence of a selective pressure related to the relative functional impact of the change. Moreover, 41 variants showed a significant deviation from the Hardy-Weinberg equilibrium, which supports the existence of a selective process against some alleles. The average number of variants per individual in miRNAs was 28. CONCLUSIONS: Despite an expectation that miRNAs would be highly conserved genomic elements, our study reports a level of variability comparable to that observed for coding genes.}, keywords = {NGS}, issn = {1756-994X}, doi = {10.1186/gm363}, url = {http://genomemedicine.com/content/4/8/62/abstract}, author = {Carbonell, Jos{\'e} and Alloza, Eva and Arce, Pablo and Borrego, Salud and Santoyo, Javier and Ruiz-Ferrer, Macarena and Medina, Ignacio and Jim{\'e}nez-Almaz{\'a}n, Jorge and M{\'e}ndez-Vidal, Cristina and Gonz{\'a}lez-del Pozo, Mar{\'\i}a and Vela, Alicia and Bhattacharya, Shomi S and Anti{\v n}olo, Guillermo and Dopazo, Joaquin} } @article {939, title = {Select your SNPs (SYSNPs): a web tool for automatic and massive selection of SNPs.}, journal = {International journal of data mining and bioinformatics}, volume = {6}, year = {2012}, month = {2012}, pages = {324-34}, abstract = {Association studies are the choice approach in the discovery of the genomic basis of complex traits. To carry out such analysis, researchers frequently need to (1) select optimally informative sets of Single Nucleotide Polymorphisms (SNPs) in candidate regions and (2) annotate the results of associations found by means of genome-wide SNP arrays. These are complex tasks, since many criteria have to be considered, including the SNPs{\textquoteright} functional properties, technological information and haplotype frequencies in given populations. SYSNPs implements algorithms that allow for efficient and simultaneous consideration of all the relevant criteria to obtain sets of SNPs that properly cover arbitrarily large lists of genes or genomic regions. Complementarily, SYSNPs allows for comprehensive functional annotation of SNPs linked to any given marker SNP. SYSNPs dramatically reduces the effort needed for SNP selection from days of searching various databases to a few minutes using a simple browser.}, issn = {1748-5673}, url = {http://inderscience.metapress.com/content/f76740x8071u513n/}, author = {Lorente-Galdos, Bel{\'e}n and Medina, Ignacio and Morcillo-Suarez, Carlos and Heredia, Txema and Carre{\~n}o-Torres, Angel and Sangr{\'o}s, Ricardo and Alegre, Josep and Pita, Guillermo and Vellalta, Gemma and Malats, Nuria and Pisano, David G and Joaqu{\'\i}n Dopazo and Navarro, Arcadi} } @article {900, title = {Using GPUs for the Exact Alignment of Short-read Genetic Sequences by Means of the Burrows{\textendash}Wheeler Transform.}, journal = {IEEE/ACM transactions on computational biology and bioinformatics / IEEE, ACM}, volume = {9}, number = {4}, year = {2012}, month = {2012 Mar 20}, pages = {1245-1256}, abstract = {General Purpose Graphic Processing Units (GPGPUs) constitute an inexpensive resource for computing-intensive applications that could exploit an intrinsic fine-grain parallelism. This paper presents the design and implementation in GPGPUs of an exact alignment tool for nucleotide sequences based on the Burrows-Wheeler Transform. We compare this algorithm with state-of-the-art implementations of the same algorithm over standard CPUs, and considering the same conditions in terms of I/O. Excluding disk transfers, the implementation of the algorithm in GPUs shows a speedup larger than 12x, when compared to CPU execution. This implementation exploits the parallelism by concurrently searching different sequences on the same reference search tree, maximising memory locality and ensuring a symmetric access to the data. The article describes the behaviour of the algorithm in GPU, showing a good scalability in the performance, only limited by the size of the GPU inner memory.}, keywords = {Burrows-Wheeler transform, CPU execution, GPGPU, NGS}, issn = {1557-9964}, doi = {10.1109/TCBB.2012.49}, url = {http://ieeexplore.ieee.org.sire.ub.edu/xpl/articleDetails.jsp?reload=true\&arnumber=6175888}, author = {Salavert Torres, Jose and Blanquer Espert, Ignacio and Tomas Dominguez, Andres and Hernendez, Vicente and Medina, Ignacio and Terraga, Joaquin and Dopazo, Joaquin} } @article {523, title = {VARIANT: Command Line, Web service and Web interface for fast and accurate functional characterization of variants found by Next-Generation Sequencing.}, journal = {Nucleic Acids Res}, volume = {40}, year = {2012}, month = {2012 Jul}, pages = {W54-8}, abstract = {

The massive use of Next-Generation Sequencing (NGS) technologies is uncovering an unexpected amount of variability. The functional characterization of such variability, particularly in the most common form of variation found, the Single Nucleotide Variants (SNVs), has become a priority that needs to be addressed in a systematic way. VARIANT (VARIant ANalyis Tool) reports information on the variants found that include consequence type and annotations taken from different databases and repositories (SNPs and variants from dbSNP and 1000 genomes, and disease-related variants from the Genome-Wide Association Study (GWAS) catalog, Online Mendelian Inheritance in Man (OMIM), Catalog of Somatic Mutations in Cancer (COSMIC) mutations, etc). VARIANT also produces a rich variety of annotations that include information on the regulatory (transcription factor or miRNA-binding sites, etc.) or structural roles, or on the selective pressures on the sites affected by the variation. This information allows extending the conventional reports beyond the coding regions and expands the knowledge on the contribution of non-coding or synonymous variants to the phenotype studied. Contrarily to other tools, VARIANT uses a remote database and operates through efficient RESTful Web Services that optimize search and transaction operations. In this way, local problems of installation, update or disk size limitations are overcome without the need of sacrifice speed (thousands of variants are processed per minute). VARIANT is available at: http://variant.bioinfo.cipf.es.

}, keywords = {Databases, Nucleic Acid, Genetic Variation, High-Throughput Nucleotide Sequencing, Internet, Molecular Sequence Annotation, mutation, Polymorphism, Single Nucleotide, Software, User-Computer Interface}, issn = {1362-4962}, doi = {10.1093/nar/gks572}, author = {Medina, Ignacio and De Maria, Alejandro and Bleda, Marta and Salavert, Francisco and Alonso, Roberto and Gonzalez, Cristina Y and Dopazo, Joaquin} } @article {524, title = {Whole-genome bisulfite DNA sequencing of a DNMT3B mutant patient.}, journal = {Epigenetics}, volume = {7}, year = {2012}, month = {2012 Jun 01}, pages = {542-50}, abstract = {

The immunodeficiency, centromere instability and facial anomalies (ICF) syndrome is associated to mutations of the DNA methyl-transferase DNMT3B, resulting in a reduction of enzyme activity. Aberrant expression of immune system genes and hypomethylation of pericentromeric regions accompanied by chromosomal instability were determined as alterations driving the disease phenotype. However, so far only technologies capable to analyze single loci were applied to determine epigenetic alterations in ICF patients. In the current study, we performed whole-genome bisulphite sequencing to assess alteration in DNA methylation at base pair resolution. Genome-wide we detected a decrease of methylation level of 42\%, with the most profound changes occurring in inactive heterochromatic regions, satellite repeats and transposons. Interestingly, transcriptional active loci and ribosomal RNA repeats escaped global hypomethylation. Despite a genome-wide loss of DNA methylation the epigenetic landscape and crucial regulatory structures were conserved. Remarkably, we revealed a mislocated activity of mutant DNMT3B to H3K4me1 loci resulting in hypermethylation of active promoters. Functionally, we could associate alterations in promoter methylation with the ICF syndrome immunodeficient phenotype by detecting changes in genes related to the B-cell receptor mediated maturation pathway.

}, keywords = {B-Lymphocytes, Cell Line, Transformed, Child, Preschool, DNA (Cytosine-5-)-Methyltransferases, DNA Methylation, Epigenesis, Genetic, Face, Female, Genome, Human, High-Throughput Nucleotide Sequencing, Humans, Immunologic Deficiency Syndromes, mutation, Primary Immunodeficiency Diseases, Sequence Analysis, DNA, Sulfites}, issn = {1559-2308}, doi = {10.4161/epi.20523}, author = {Heyn, Holger and Vidal, Enrique and Sayols, Sergi and Sanchez-Mut, Jose V and Moran, Sebastian and Medina, Ignacio and Sandoval, Juan and Sim{\'o}-Riudalbas, Laia and Szczesna, Karolina and Huertas, Dori and Gatto, Sole and Matarazzo, Maria R and Dopazo, Joaquin and Esteller, Manel} } @article {539, title = {Phylemon 2.0: a suite of web-tools for molecular evolution, phylogenetics, phylogenomics and hypotheses testing.}, journal = {Nucleic Acids Res}, volume = {39}, year = {2011}, month = {2011 Jul}, pages = {W470-4}, abstract = {

Phylemon 2.0 is a new release of the suite of web tools for molecular evolution, phylogenetics, phylogenomics and hypotheses testing. It has been designed as a response to the increasing demand of molecular sequence analyses for experts and non-expert users. Phylemon 2.0 has several unique features that differentiates it from other similar web resources: (i) it offers an integrated environment that enables evolutionary analyses, format conversion, file storage and edition of results; (ii) it suggests further analyses, thereby guiding the users through the web server; and (iii) it allows users to design and save phylogenetic pipelines to be used over multiple genes (phylogenomics). Altogether, Phylemon 2.0 integrates a suite of 30 tools covering sequence alignment reconstruction and trimming; tree reconstruction, visualization and manipulation; and evolutionary hypotheses testing.

}, keywords = {Evolution, Molecular, Genomics, Internet, Phylogeny, Sequence Alignment, Software}, issn = {1362-4962}, doi = {10.1093/nar/gkr408}, author = {S{\'a}nchez, Rub{\'e}n and Serra, Fran{\c c}ois and T{\'a}rraga, Joaqu{\'\i}n and Medina, Ignacio and Carbonell, Jos{\'e} and Pulido, Luis and De Maria, Alejandro and Capella-Gut{\'\i}errez, Salvador and Huerta-Cepas, Jaime and Gabald{\'o}n, Toni and Dopazo, Joaquin and Dopazo, Hern{\'a}n} } @article {20478823, title = {Babelomics: an integrative platform for the analysis of transcriptomics, proteomics and genomic data with advanced functional profiling.}, journal = {Nucleic Acids Research}, volume = {38}, year = {2010}, month = {2010 May 16}, pages = {W210-W213. Featured in NAR}, chapter = {Featured in NAR}, abstract = {

Babelomics is a response to the growing necessity of integrating and analyzing different types of genomic data in an environment that allows an easy functional interpretation of the results. Babelomics includes a complete suite of methods for the analysis of gene expression data that include normalization (covering most commercial platforms), pre-processing, differential gene expression (case-controls, multiclass, survival or continuous values), predictors, clustering; large-scale genotyping assays (case controls and TDTs, and allows population stratification analysis and correction). All these genomic data analysis facilities are integrated and connected to multiple options for the functional interpretation of the experiments. Different methods of functional enrichment or gene set enrichment can be used to understand the functional basis of the experiment analyzed. Many sources of biological information, which include functional (GO, KEGG, Biocarta, Reactome, etc.), regulatory (Transfac, Jaspar, ORegAnno, miRNAs, etc.), text-mining or protein-protein interaction modules can be used for this purpose. Finally a tool for the de novo functional annotation of sequences has been included in the system. This provides support for the functional analysis of non-model species. Mirrors of Babelomics or command line execution of their individual components are now possible. Babelomics is available at http://www.babelomics.org.

}, keywords = {babelomics, gene expression, genotyping, gepas, GSA, GWAS}, url = {http://nar.oxfordjournals.org/content/38/suppl_2/W210.full}, author = {Medina, Ignacio and Carbonell, Jos{\'e} and Pulido, Luis and Madeira, Sara C and Goetz, Stefan and Ana Conesa and T{\'a}rraga, Joaqu{\'\i}n and Pascual-Montano, Alberto and Nogales-Cadenas, Ruben and Santoyo, Javier and Garc{\'\i}a, Francisco and Marb{\`a}, Martina and Montaner, David and Joaqu{\'\i}n Dopazo} } @article {20361057, title = {Initial genomics of the human nucleolus.}, journal = {PLoS genetics}, volume = {6}, year = {2010}, month = {2010}, pages = {e1000889}, abstract = {

We report for the first time the genomics of a nuclear compartment of the eukaryotic cell. 454 sequencing and microarray analysis revealed the pattern of nucleolus-associated chromatin domains (NADs) in the linear human genome and identified different gene families and certain satellite repeats as the major building blocks of NADs, which constitute about 4\% of the genome. Bioinformatic evaluation showed that NAD-localized genes take part in specific biological processes, like the response to other organisms, odor perception, and tissue development. 3D FISH and immunofluorescence experiments illustrated the spatial distribution of NAD-specific chromatin within interphase nuclei and its alteration upon transcriptional changes. Altogether, our findings describe the nature of DNA sequences associated with the human nucleolus and provide insights into the function of the nucleolus in genome organization and establishment of nuclear architecture.

}, keywords = {NGS, nucleolus}, doi = {10.1371/journal.pgen.1000889}, url = {http://www.plosgenetics.org/article/info\%3Adoi\%2F10.1371\%2Fjournal.pgen.1000889}, author = {N{\'e}meth, Attila and Ana Conesa and Santoyo-L{\'o}pez, Javier and Medina, Ignacio and Montaner, David and P{\'e}terfia, B{\'a}lint and Solovei, Irina and Cremer, Thomas and Dopazo, Joaquin and L{\"a}ngst, Gernot} } @article {20676074, title = {The MicroArray Quality Control (MAQC)-II study of common practices for the development and validation of microarray-based predictive models.}, journal = {Nature biotechnology}, volume = {28}, year = {2010}, month = {2010 Aug}, pages = {827-38}, abstract = {

Gene expression data from microarrays are being applied to predict preclinical and clinical endpoints, but the reliability of these predictions has not been established. In the MAQC-II project, 36 independent teams analyzed six microarray data sets to generate predictive models for classifying a sample with respect to one of 13 endpoints indicative of lung or liver toxicity in rodents, or of breast cancer, multiple myeloma or neuroblastoma in humans. In total, \>30,000 models were built using many combinations of analytical methods. The teams generated predictive models without knowing the biological meaning of some of the endpoints and, to mimic clinical reality, tested the models on data that had not been used for training. We found that model performance depended largely on the endpoint and team proficiency and that different approaches generated models of similar performance. The conclusions and recommendations from MAQC-II should be useful for regulatory agencies, study committees and independent investigators that evaluate methods for global gene expression analysis.

}, url = {http://www.nature.com/nbt/journal/v28/n8/full/nbt.1665.html}, author = {Shi, Leming and Campbell, Gregory and Jones, Wendell D and Campagne, Fabien and Wen, Zhining and Walker, Stephen J and Su, Zhenqiang and Chu, Tzu-Ming and Goodsaid, Federico M and Pusztai, Lajos and Shaughnessy, John D and Oberthuer, Andr{\'e} and Thomas, Russell S and Paules, Richard S and Fielden, Mark and Barlogie, Bart and Chen, Weijie and Du, Pan and Fischer, Matthias and Furlanello, Cesare and Gallas, Brandon D and Ge, Xijin and Megherbi, Dalila B and Symmans, W Fraser and Wang, May D and Zhang, John and Bitter, Hans and Brors, Benedikt and Bushel, Pierre R and Bylesjo, Max and Chen, Minjun and Cheng, Jie and Cheng, Jing and Chou, Jeff and Davison, Timothy S and Delorenzi, Mauro and Deng, Youping and Devanarayan, Viswanath and Dix, David J and Dopazo, Joaquin and Dorff, Kevin C and Elloumi, Fathi and Fan, Jianqing and Fan, Shicai and Fan, Xiaohui and Fang, Hong and Gonzaludo, Nina and Hess, Kenneth R and Hong, Huixiao and Huan, Jun and Irizarry, Rafael A and Judson, Richard and Juraeva, Dilafruz and Lababidi, Samir and Lambert, Christophe G and Li, Li and Li, Yanen and Li, Zhen and Lin, Simon M and Liu, Guozhen and Lobenhofer, Edward K and Luo, Jun and Luo, Wen and McCall, Matthew N and Nikolsky, Yuri and Pennello, Gene A and Perkins, Roger G and Philip, Reena and Popovici, Vlad and Price, Nathan D and Qian, Feng and Scherer, Andreas and Shi, Tieliu and Shi, Weiwei and Sung, Jaeyun and Thierry-Mieg, Danielle and Thierry-Mieg, Jean and Thodima, Venkata and Trygg, Johan and Vishnuvajjala, Lakshmi and Wang, Sue Jane and Wu, Jianping and Wu, Yichao and Xie, Qian and Yousef, Waleed A and Zhang, Liang and Zhang, Xuegong and Zhong, Sheng and Zhou, Yiming and Zhu, Sheng and Arasappan, Dhivya and Bao, Wenjun and Lucas, Anne Bergstrom and Berthold, Frank and Brennan, Richard J and Buness, Andreas and Catalano, Jennifer G and Chang, Chang and Chen, Rong and Cheng, Yiyu and Cui, Jian and Czika, Wendy and Demichelis, Francesca and Deng, Xutao and Dosymbekov, Damir and Eils, Roland and Feng, Yang and Fostel, Jennifer and Fulmer-Smentek, Stephanie and Fuscoe, James C and Gatto, Laurent and Ge, Weigong and Goldstein, Darlene R and Guo, Li and Halbert, Donald N and Han, Jing and Harris, Stephen C and Hatzis, Christos and Herman, Damir and Huang, Jianping and Jensen, Roderick V and Jiang, Rui and Johnson, Charles D and Jurman, Giuseppe and Kahlert, Yvonne and Khuder, Sadik A and Kohl, Matthias and Li, Jianying and Li, Li and Li, Menglong and Li, Quan-Zhen and Li, Shao and Li, Zhiguang and Liu, Jie and Liu, Ying and Liu, Zhichao and Meng, Lu and Madera, Manuel and Martinez-Murillo, Francisco and Medina, Ignacio and Meehan, Joseph and Miclaus, Kelci and Moffitt, Richard A and Montaner, David and Mukherjee, Piali and Mulligan, George J and Neville, Padraic and Nikolskaya, Tatiana and Ning, Baitang and Page, Grier P and Parker, Joel and Parry, R Mitchell and Peng, Xuejun and Peterson, Ron L and Phan, John H and Quanz, Brian and Ren, Yi and Riccadonna, Samantha and Roter, Alan H and Samuelson, Frank W and Schumacher, Martin M and Shambaugh, Joseph D and Shi, Qiang and Shippy, Richard and Si, Shengzhu and Smalter, Aaron and Sotiriou, Christos and Soukup, Mat and Staedtler, Frank and Steiner, Guido and Stokes, Todd H and Sun, Qinglan and Tan, Pei-Yi and Tang, Rong and Tezak, Zivana and Thorn, Brett and Tsyganova, Marina and Turpaz, Yaron and Vega, Silvia C and Visintainer, Roberto and von Frese, Juergen and Wang, Charles and Wang, Eric and Wang, Junwei and Wang, Wei and Westermann, Frank and Willey, James C and Woods, Matthew and Wu, Shujian and Xiao, Nianqing and Xu, Joshua and Xu, Lei and Yang, Lun and Zeng, Xiao and Zhang, Jialu and Zhang, Li and Zhang, Min and Zhao, Chen and Puri, Raj K and Scherf, Uwe and Tong, Weida and Wolfinger, Russell D} } @article {576, title = {Serial Expression Analysis: a web tool for the analysis of serial gene expression data.}, journal = {Nucleic Acids Res}, volume = {38}, year = {2010}, month = {2010 Jul}, pages = {W239-45}, abstract = {

Serial transcriptomics experiments investigate the dynamics of gene expression changes associated with a quantitative variable such as time or dosage. The statistical analysis of these data implies the study of global and gene-specific expression trends, the identification of significant serial changes, the comparison of expression profiles and the assessment of transcriptional changes in terms of cellular processes. We have created the SEA (Serial Expression Analysis) suite to provide a complete web-based resource for the analysis of serial transcriptomics data. SEA offers five different algorithms based on univariate, multivariate and functional profiling strategies framed within a user-friendly interface and a project-oriented architecture to facilitate the analysis of serial gene expression data sets from different perspectives. SEA is available at sea.bioinfo.cipf.es.

}, keywords = {Algorithms, Gene Expression Profiling, Internet, Kinetics, Linear Models, Oligonucleotide Array Sequence Analysis, Software}, issn = {1362-4962}, doi = {10.1093/nar/gkq488}, author = {Nueda, Maria Jos{\'e} and Carbonell, Jos{\'e} and Medina, Ignacio and Dopazo, Joaquin and Conesa, Ana} } @article {IgnacioMedina07012009, title = {Gene set-based analysis of polymorphisms: finding pathways or biological processes associated to traits in genome-wide association studies}, journal = {Nucl. Acids Res.}, volume = {37}, number = {suppl_2}, year = {2009}, pages = {W340-344}, abstract = {

Genome-wide association studies have become a popular strategy to find associations of genes to traits of interest. Despite the high-resolution available today to carry out genotyping studies, the success of its application in real studies has been limited by the testing strategy used. As an alternative to brute force solutions involving the use of very large cohorts, we propose the use of the Gene Set Analysis (GSA), a different analysis strategy based on testing the association of modules of functionally related genes. We show here how the Gene Set-based Analysis of Polymorphisms (GeSBAP), which is a simple implementation of the GSA strategy for the analysis of genome-wide association studies, provides a significant increase in the power testing for this type of studies. GeSBAP is freely available at http://bioinfo.cipf.es/gesbap/

}, keywords = {babelomics, gene set, GESBAP, pathway-based analysis, SNP}, doi = {10.1093/nar/gkp481}, url = {http://nar.oxfordjournals.org/cgi/content/abstract/37/suppl_2/W340}, author = {Medina, Ignacio and Montaner, David and Bonifaci, N{\'u}ria and Pujana, Miguel Angel and Carbonell, Jos{\'e} and T{\'a}rraga, Joaqu{\'\i}n and Fatima Al-Shahrour and Dopazo, Joaquin} } @article {583, title = {Gene set-based analysis of polymorphisms: finding pathways or biological processes associated to traits in genome-wide association studies.}, journal = {Nucleic Acids Res}, volume = {37}, year = {2009}, month = {2009 Jul}, pages = {W340-4}, abstract = {

Genome-wide association studies have become a popular strategy to find associations of genes to traits of interest. Despite the high-resolution available today to carry out genotyping studies, the success of its application in real studies has been limited by the testing strategy used. As an alternative to brute force solutions involving the use of very large cohorts, we propose the use of the Gene Set Analysis (GSA), a different analysis strategy based on testing the association of modules of functionally related genes. We show here how the Gene Set-based Analysis of Polymorphisms (GeSBAP), which is a simple implementation of the GSA strategy for the analysis of genome-wide association studies, provides a significant increase in the power testing for this type of studies. GeSBAP is freely available at http://bioinfo.cipf.es/gesbap/.

}, keywords = {Biological Phenomena, Breast Neoplasms, Female, Genes, Genetic Variation, Genome-Wide Association Study, Humans, Polymorphism, Single Nucleotide, Software, User-Computer Interface}, issn = {1362-4962}, doi = {10.1093/nar/gkp481}, author = {Medina, Ignacio and Montaner, David and Bonifaci, N{\'u}ria and Pujana, Miguel Angel and Carbonell, Jos{\'e} and T{\'a}rraga, Joaqu{\'\i}n and Al-Shahrour, F{\'a}tima and Dopazo, Joaquin} } @article {18515841, title = {Babelomics: advanced functional profiling of transcriptomics, proteomics and genomics experiments}, journal = {Nucleic Acids Res}, volume = {36}, year = {2008}, note = {

Al-Shahrour, Fatima Carbonell, Jose Minguez, Pablo Goetz, Stefan Conesa, Ana Tarraga, Joaquin Medina, Ignacio Alloza, Eva Montaner, David Dopazo, Joaquin Research Support, Non-U.S. Gov{\textquoteright}t England Nucleic acids research Nucleic Acids Res. 2008 Jul 1;36(Web Server issue):W341-6. Epub 2008 May 31.

}, pages = {W341-6}, abstract = {

We present a new version of Babelomics, a complete suite of web tools for the functional profiling of genome scale experiments, with new and improved methods as well as more types of functional definitions. Babelomics includes different flavours of conventional functional enrichment methods as well as more advanced gene set analysis methods that makes it a unique tool among the similar resources available. In addition to the well-known functional definitions (GO, KEGG), Babelomics includes new ones such as Biocarta pathways or text mining-derived functional terms. Regulatory modules implemented include transcriptional control (Transfac, CisRed) and other levels of regulation such as miRNA-mediated interference. Moreover, Babelomics allows for sub-selection of terms in order to test more focused hypothesis. Also gene annotation correspondence tables can be imported, which allows testing with user-defined functional modules. Finally, a tool for the {\textquoteright}de novo{\textquoteright} functional annotation of sequences has been included in the system. This allows using yet unannotated organisms in the program. Babelomics has been extensively re-engineered and now it includes the use of web services and Web 2.0 technology features, a new user interface with persistent sessions and a new extended database of gene identifiers. Babelomics is available at http://www.babelomics.org.

}, keywords = {babelomics, funtional profiling}, url = {http://nar.oxfordjournals.org/content/36/suppl_2/W341.long}, author = {Fatima Al-Shahrour and Carbonell, J. and Minguez, P. and Goetz, S. and A. Conesa and Tarraga, J. and Medina, Ignacio and Alloza, E. and Montaner, D. and Dopazo, J.} } @article {19094230, title = {Biological processes, properties and molecular wiring diagrams of candidate low-penetrance breast cancer susceptibility genes}, journal = {BMC Med Genomics}, volume = {1}, year = {2008}, note = {

Bonifaci, Nuria Berenguer, Antoni Diez, Javier Reina, Oscar Medina, Ignacio Dopazo, Joaquin Moreno, Victor Pujana, Miguel Angel England BMC medical genomics BMC Med Genomics. 2008 Dec 18;1:62.

}, pages = {62}, abstract = {

ABSTRACT: BACKGROUND: Recent advances in whole-genome association studies (WGASs) for human cancer risk are beginning to provide the part lists of low-penetrance susceptibility genes. However, statistical analysis in these studies is complicated by the vast number of genetic variants examined and the weak effects observed, as a result of which constraints must be incorporated into the study design and analytical approach. In this scenario, biological attributes beyond the adjusted statistics generally receive little attention and, more importantly, the fundamental biological characteristics of low-penetrance susceptibility genes have yet to be determined. METHODS: We applied an integrative approach for identifying candidate low-penetrance breast cancer susceptibility genes, their characteristics and molecular networks through the analysis of diverse sources of biological evidence. RESULTS: First, examination of the distribution of Gene Ontology terms in ordered WGAS results identified asymmetrical distribution of Cell Communication and Cell Death processes linked to risk. Second, analysis of 11 different types of molecular or functional relationships in genomic and proteomic data sets defined the \"omic\" properties of candidate genes: i/ differential expression in tumors relative to normal tissue; ii/ somatic genomic copy number changes correlating with gene expression levels; iii/ differentially expressed across age at diagnosis; and iv/ expression changes after BRCA1 perturbation. Finally, network modeling of the effects of variants on germline gene expression showed higher connectivity than expected by chance between novel candidates and with known susceptibility genes, which supports functional relationships and provides mechanistic hypotheses of risk. CONCLUSION: This study proposes that cell communication and cell death are major biological processes perturbed in risk of breast cancer conferred by low-penetrance variants, and defines the common omic properties, molecular interactions and possible functional effects of candidate genes and proteins.

}, keywords = {gene set, GWAS, SNP}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=19094230}, author = {Bonifaci, N. and Berenguer, A. and Diez, J. and Reina, O. and Medina, Ignacio and Dopazo, J. and Moreno, V. and Pujana, M. A.} } @article {593, title = {GEPAS, a web-based tool for microarray data analysis and interpretation.}, journal = {Nucleic Acids Res}, volume = {36}, year = {2008}, month = {2008 Jul 01}, pages = {W308-14}, abstract = {

Gene Expression Profile Analysis Suite (GEPAS) is one of the most complete and extensively used web-based packages for microarray data analysis. During its more than 5 years of activity it has continuously been updated to keep pace with the state-of-the-art in the changing microarray data analysis arena. GEPAS offers diverse analysis options that include well established as well as novel algorithms for normalization, gene selection, class prediction, clustering and functional profiling of the experiment. New options for time-course (or dose-response) experiments, microarray-based class prediction, new clustering methods and new tests for differential expression have been included. The new pipeliner module allows automating the execution of sequential analysis steps by means of a simple but powerful graphic interface. An extensive re-engineering of GEPAS has been carried out which includes the use of web services and Web 2.0 technology features, a new user interface with persistent sessions and a new extended database of gene identifiers. GEPAS is nowadays the most quoted web tool in its field and it is extensively used by researchers of many countries and its records indicate an average usage rate of 500 experiments per day. GEPAS, is available at http://www.gepas.org.

}, keywords = {Computer Graphics, Dose-Response Relationship, Drug, Gene Expression Profiling, Internet, Kinetics, Oligonucleotide Array Sequence Analysis, Software}, issn = {1362-4962}, doi = {10.1093/nar/gkn303}, author = {T{\'a}rraga, Joaqu{\'\i}n and Medina, Ignacio and Carbonell, Jos{\'e} and Huerta-Cepas, Jaime and Minguez, Pablo and Alloza, Eva and Al-Shahrour, F{\'a}tima and Vegas-Azc{\'a}rate, Susana and Goetz, Stefan and Escobar, Pablo and Garcia-Garcia, Francisco and Conesa, Ana and Montaner, David and Dopazo, Joaquin} } @article {18508806, title = {GEPAS, a web-based tool for microarray data analysis and interpretation}, journal = {Nucleic Acids Res}, volume = {36}, year = {2008}, note = {

Tarraga, Joaquin Medina, Ignacio Carbonell, Jose Huerta-Cepas, Jaime Minguez, Pablo Alloza, Eva Al-Shahrour, Fatima Vegas-Azcarate, Susana Goetz, Stefan Escobar, Pablo Garcia-Garcia, Francisco Conesa, Ana Montaner, David Dopazo, Joaquin Research Support, Non-U.S. Gov{\textquoteright}t England Nucleic acids research Nucleic Acids Res. 2008 Jul 1;36(Web Server issue):W308-14. Epub 2008 May 28.

}, pages = {W308-14}, abstract = {

Gene Expression Profile Analysis Suite (GEPAS) is one of the most complete and extensively used web-based packages for microarray data analysis. During its more than 5 years of activity it has continuously been updated to keep pace with the state-of-the-art in the changing microarray data analysis arena. GEPAS offers diverse analysis options that include well established as well as novel algorithms for normalization, gene selection, class prediction, clustering and functional profiling of the experiment. New options for time-course (or dose-response) experiments, microarray-based class prediction, new clustering methods and new tests for differential expression have been included. The new pipeliner module allows automating the execution of sequential analysis steps by means of a simple but powerful graphic interface. An extensive re-engineering of GEPAS has been carried out which includes the use of web services and Web 2.0 technology features, a new user interface with persistent sessions and a new extended database of gene identifiers. GEPAS is nowadays the most quoted web tool in its field and it is extensively used by researchers of many countries and its records indicate an average usage rate of 500 experiments per day. GEPAS, is available at http://www.gepas.org.

}, keywords = {gepas, microarray data analysis}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=18508806}, author = {Tarraga, J. and Medina, Ignacio and Carbonell, J. and Huerta-Cepas, J. and Minguez, P. and Alloza, E. and Fatima Al-Shahrour and Vegas-Azcarate, S. and Goetz, S. and Escobar, P. and Garcia-Garcia, F. and A. Conesa and Montaner, D. and Dopazo, J.} } @article {596, title = {Joint annotation of coding and non-coding single nucleotide polymorphisms and mutations in the SNPeffect and PupaSuite databases.}, journal = {Nucleic Acids Res}, volume = {36}, year = {2008}, month = {2008 Jan}, pages = {D825-9}, abstract = {

Single nucleotide polymorphisms (SNPs) are, together with copy number variation, the primary source of variation in the human genome. SNPs are associated with altered response to drug treatment, susceptibility to disease and other phenotypic variation. Furthermore, during genetic screens for disease-associated mutations in groups of patients and control individuals, the distinction between disease causing mutation and polymorphism is often unclear. Annotation of the functional and structural implications of single nucleotide changes thus provides valuable information to interpret and guide experiments. The SNPeffect and PupaSuite databases are now synchronized to deliver annotations for both non-coding and coding SNP, as well as annotations for the SwissProt set of human disease mutations. In addition, SNPeffect now contains predictions of Tango2: an improved aggregation detector, and Waltz: a novel predictor of amyloid-forming sequences, as well as improved predictors for regions that are recognized by the Hsp70 family of chaperones. The new PupaSuite version incorporates predictions for SNPs in silencers and miRNAs including their targets, as well as additional methods for predicting SNPs in TFBSs and splice sites. Also predictions for mouse and rat genomes have been added. In addition, a PupaSuite web service has been developed to enable data access, programmatically. The combined database holds annotations for 4,965,073 regulatory as well as 133,505 coding human SNPs and 14,935 disease mutations, and phenotypic descriptions of 43,797 human proteins and is accessible via http://snpeffect.vib.be and http://pupasuite.bioinfo.cipf.es/.

}, keywords = {Amino Acid Substitution, Animals, Databases, Genetic, Genetic Diseases, Inborn, HSP70 Heat-Shock Proteins, Humans, Internet, Mice, MicroRNAs, mutation, Polymorphism, Single Nucleotide, Proteins, Rats, RNA Splice Sites, Transcription Factors}, issn = {1362-4962}, doi = {10.1093/nar/gkm979}, author = {Reumers, Joke and Conde, Lucia and Medina, Ignacio and Maurer-Stroh, Sebastian and Van Durme, Joost and Dopazo, Joaquin and Rousseau, Frederic and Schymkowitz, Joost} } @article {18086700, title = {Joint annotation of coding and non-coding single nucleotide polymorphisms and mutations in the SNPeffect and PupaSuite databases}, journal = {Nucleic Acids Res}, volume = {36}, number = {Database issue}, year = {2008}, note = {

Reumers, Joke Conde, Lucia Medina, Ignacio Maurer-Stroh, Sebastian Van Durme, Joost Dopazo, Joaquin Rousseau, Frederic Schymkowitz, Joost Research Support, Non-U.S. Gov{\textquoteright}t England Nucleic acids research Nucleic Acids Res. 2008 Jan;36(Database issue):D825-9. Epub 2007 Dec 17.

}, pages = {D825-9}, abstract = {

Single nucleotide polymorphisms (SNPs) are, together with copy number variation, the primary source of variation in the human genome. SNPs are associated with altered response to drug treatment, susceptibility to disease and other phenotypic variation. Furthermore, during genetic screens for disease-associated mutations in groups of patients and control individuals, the distinction between disease causing mutation and polymorphism is often unclear. Annotation of the functional and structural implications of single nucleotide changes thus provides valuable information to interpret and guide experiments. The SNPeffect and PupaSuite databases are now synchronized to deliver annotations for both non-coding and coding SNP, as well as annotations for the SwissProt set of human disease mutations. In addition, SNPeffect now contains predictions of Tango2: an improved aggregation detector, and Waltz: a novel predictor of amyloid-forming sequences, as well as improved predictors for regions that are recognized by the Hsp70 family of chaperones. The new PupaSuite version incorporates predictions for SNPs in silencers and miRNAs including their targets, as well as additional methods for predicting SNPs in TFBSs and splice sites. Also predictions for mouse and rat genomes have been added. In addition, a PupaSuite web service has been developed to enable data access, programmatically. The combined database holds annotations for 4,965,073 regulatory as well as 133,505 coding human SNPs and 14,935 disease mutations, and phenotypic descriptions of 43,797 human proteins and is accessible via http://snpeffect.vib.be and http://pupasuite.bioinfo.cipf.es/.

}, keywords = {Amino Acid Substitution Animals *Databases, Genetic Genetic Diseases, Inborn/genetics HSP70 Heat-Shock Proteins/metabolism Humans Internet Mice MicroRNAs/metabolism *Mutation *Polymorphism, Single Nucleotide Proteins/chemistry/genetics RNA Splice Sites Rats Transcription Factors/metabolism}, url = {http://nar.oxfordjournals.org/cgi/content/full/36/suppl_1/D825}, author = {Reumers, J. and L. Conde and Medina, Ignacio and Maurer-Stroh, S. and Van Durme, J. and Dopazo, J. and Rousseau, F. and Schymkowitz, J.} } @article {599, title = {SNP and haplotype mapping for genetic analysis in the rat.}, journal = {Nat Genet}, volume = {40}, year = {2008}, month = {2008 May}, pages = {560-6}, abstract = {

The laboratory rat is one of the most extensively studied model organisms. Inbred laboratory rat strains originated from limited Rattus norvegicus founder populations, and the inherited genetic variation provides an excellent resource for the correlation of genotype to phenotype. Here, we report a survey of genetic variation based on almost 3 million newly identified SNPs. We obtained accurate and complete genotypes for a subset of 20,238 SNPs across 167 distinct inbred rat strains, two rat recombinant inbred panels and an F2 intercross. Using 81\% of these SNPs, we constructed high-density genetic maps, creating a large dataset of fully characterized SNPs for disease gene mapping. Our data characterize the population structure and illustrate the degree of linkage disequilibrium. We provide a detailed SNP map and demonstrate its utility for mapping of quantitative trait loci. This community resource is openly available and augments the genetic tools for this workhorse of physiological studies.

}, keywords = {Animals, Chromosome Mapping, Databases, Genetic, Genome, Haplotypes, Linkage Disequilibrium, Phylogeny, Polymorphism, Single Nucleotide, Quantitative Trait Loci, Rats, Rats, Inbred Strains, Recombination, Genetic}, issn = {1546-1718}, doi = {10.1038/ng.124}, author = {Saar, Kathrin and Beck, Alfred and Bihoreau, Marie-Th{\'e}r{\`e}se and Birney, Ewan and Brocklebank, Denise and Chen, Yuan and Cuppen, Edwin and Demonchy, Stephanie and Dopazo, Joaquin and Flicek, Paul and Foglio, Mario and Fujiyama, Asao and Gut, Ivo G and Gauguier, Dominique and Guig{\'o}, Roderic and Guryev, Victor and Heinig, Matthias and Hummel, Oliver and Jahn, Niels and Klages, Sven and Kren, Vladimir and Kube, Michael and Kuhl, Heiner and Kuramoto, Takashi and Kuroki, Yoko and Lechner, Doris and Lee, Young-Ae and Lopez-Bigas, Nuria and Lathrop, G Mark and Mashimo, Tomoji and Medina, Ignacio and Mott, Richard and Patone, Giannino and Perrier-Cornet, Jeanne-Antide and Platzer, Matthias and Pravenec, Michal and Reinhardt, Richard and Sakaki, Yoshiyuki and Schilhabel, Markus and Schulz, Herbert and Serikawa, Tadao and Shikhagaie, Medya and Tatsumoto, Shouji and Taudien, Stefan and Toyoda, Atsushi and Voigt, Birger and Zelenika, Diana and Zimdahl, Heike and Hubner, Norbert} } @article {18443594, title = {SNP and haplotype mapping for genetic analysis in the rat}, journal = {Nat Genet}, volume = {40}, number = {5}, year = {2008}, note = {

STAR Consortium Saar, Kathrin Beck, Alfred Bihoreau, Marie-Therese Birney, Ewan Brocklebank, Denise Chen, Yuan Cuppen, Edwin Demonchy, Stephanie Dopazo, Joaquin Flicek, Paul Foglio, Mario Fujiyama, Asao Gut, Ivo G Gauguier, Dominique Guigo, Roderic Guryev, Victor Heinig, Matthias Hummel, Oliver Jahn, Niels Klages, Sven Kren, Vladimir Kube, Michael Kuhl, Heiner Kuramoto, Takashi Kuroki, Yoko Lechner, Doris Lee, Young-Ae Lopez-Bigas, Nuria Lathrop, G Mark Mashimo, Tomoji Medina, Ignacio Mott, Richard Patone, Giannino Perrier-Cornet, Jeanne-Antide Platzer, Matthias Pravenec, Michal Reinhardt, Richard Sakaki, Yoshiyuki Schilhabel, Markus Schulz, Herbert Serikawa, Tadao Shikhagaie, Medya Tatsumoto, Shouji Taudien, Stefan Toyoda, Atsushi Voigt, Birger Zelenika, Diana Zimdahl, Heike Hubner, Norbert 057733/Z/99/A/Wellcome Trust/United Kingdom 066780/Z/01/Z/Wellcome Trust/United Kingdom Research Support, Non-U.S. Gov{\textquoteright}t Technical Report United States Nature genetics Nat Genet. 2008 May;40(5):560-6.

}, pages = {560-6}, abstract = {

The laboratory rat is one of the most extensively studied model organisms. Inbred laboratory rat strains originated from limited Rattus norvegicus founder populations, and the inherited genetic variation provides an excellent resource for the correlation of genotype to phenotype. Here, we report a survey of genetic variation based on almost 3 million newly identified SNPs. We obtained accurate and complete genotypes for a subset of 20,238 SNPs across 167 distinct inbred rat strains, two rat recombinant inbred panels and an F2 intercross. Using 81\% of these SNPs, we constructed high-density genetic maps, creating a large dataset of fully characterized SNPs for disease gene mapping. Our data characterize the population structure and illustrate the degree of linkage disequilibrium. We provide a detailed SNP map and demonstrate its utility for mapping of quantitative trait loci. This community resource is openly available and augments the genetic tools for this workhorse of physiological studies.

}, keywords = {Animals Chromosome Mapping *Databases, Genetic, Genetic Genome *Haplotypes Linkage Disequilibrium Phylogeny *Polymorphism, Inbred Strains/*genetics Recombination, Single Nucleotide *Quantitative Trait Loci Rats/*genetics Rats}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=18443594}, author = {K. Saar and A. Beck and M. T. Bihoreau and E. Birney and D. Brocklebank and Y. Chen and E. Cuppen and S. Demonchy and Dopazo, J. and P. Flicek and M. Foglio and A. Fujiyama and I. G. Gut and D. Gauguier and R. Guigo and V. Guryev and M. Heinig and O. Hummel and N. Jahn and S. Klages and V. Kren and M. Kube and H. Kuhl and Kuramoto, T. and Kuroki, Y. and Lechner, D. and Lee, Y. A. and Lopez-Bigas, N. and Lathrop, G. M. and Mashimo, T. and Medina, Ignacio and Mott, R. and Patone, G. and Perrier-Cornet, J. A. and Platzer, M. and Pravenec, M. and Reinhardt, R. and Sakaki, Y. and Schilhabel, M. and Schulz, H. and Serikawa, T. and Shikhagaie, M. and Tatsumoto, S. and Taudien, S. and Toyoda, A. and Voigt, B. and Zelenika, D. and Zimdahl, H. and Hubner, N.} } @article {17478504, title = {FatiGO +: a functional profiling tool for genomic data. Integration of functional annotation, regulatory motifs and interaction data with microarray experiments}, journal = {Nucleic Acids Res}, volume = {35}, number = {Web Server issue}, year = {2007}, note = {

Al-Shahrour, Fatima Minguez, Pablo Tarraga, Joaquin Medina, Ignacio Alloza, Eva Montaner, David Dopazo, Joaquin Research Support, Non-U.S. Gov{\textquoteright}t England Nucleic acids research Nucleic Acids Res. 2007 Jul;35(Web Server issue):W91-6. Epub 2007 May 3.

}, pages = {W91-6}, abstract = {

The ultimate goal of any genome-scale experiment is to provide a functional interpretation of the data, relating the available information with the hypotheses that originated the experiment. Thus, functional profiling methods have become essential in diverse scenarios such as microarray experiments, proteomics, etc. We present the FatiGO+, a web-based tool for the functional profiling of genome-scale experiments, specially oriented to the interpretation of microarray experiments. In addition to different functional annotations (gene ontology, KEGG pathways, Interpro motifs, Swissprot keywords and text-mining based bioentities related to diseases and chemical compounds) FatiGO+ includes, as a novelty, regulatory and structural information. The regulatory information used includes predictions of targets for distinct regulatory elements (obtained from the Transfac and CisRed databases). Additionally FatiGO+ uses predictions of target motifs of miRNA to infer which of these can be activated or deactivated in the sample of genes studied. Finally, properties of gene products related to their relative location and connections in the interactome have also been used. Also, enrichment of any of these functional terms can be directly analysed on chromosomal coordinates. FatiGO+ can be found at: http://www.fatigoplus.org and within the Babelomics environment http://www.babelomics.org.

}, keywords = {babelomics, functional enrichment analysys}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=17478504}, author = {Fatima Al-Shahrour and Minguez, P. and Tarraga, J. and Medina, Ignacio and Alloza, E. and Montaner, D. and Dopazo, J.} } @article {605, title = {FatiGO +: a functional profiling tool for genomic data. Integration of functional annotation, regulatory motifs and interaction data with microarray experiments.}, journal = {Nucleic Acids Res}, volume = {35}, year = {2007}, month = {2007 Jul}, pages = {W91-6}, abstract = {

The ultimate goal of any genome-scale experiment is to provide a functional interpretation of the data, relating the available information with the hypotheses that originated the experiment. Thus, functional profiling methods have become essential in diverse scenarios such as microarray experiments, proteomics, etc. We present the FatiGO+, a web-based tool for the functional profiling of genome-scale experiments, specially oriented to the interpretation of microarray experiments. In addition to different functional annotations (gene ontology, KEGG pathways, Interpro motifs, Swissprot keywords and text-mining based bioentities related to diseases and chemical compounds) FatiGO+ includes, as a novelty, regulatory and structural information. The regulatory information used includes predictions of targets for distinct regulatory elements (obtained from the Transfac and CisRed databases). Additionally FatiGO+ uses predictions of target motifs of miRNA to infer which of these can be activated or deactivated in the sample of genes studied. Finally, properties of gene products related to their relative location and connections in the interactome have also been used. Also, enrichment of any of these functional terms can be directly analysed on chromosomal coordinates. FatiGO+ can be found at: http://www.fatigoplus.org and within the Babelomics environment http://www.babelomics.org.

}, keywords = {Amino Acid Motifs, Animals, Binding Sites, Computational Biology, Gene Expression Profiling, Genes, Genomics, Humans, Internet, Oligonucleotide Array Sequence Analysis, Programming Languages, Software, Systems Integration, Transcription Factors}, issn = {1362-4962}, doi = {10.1093/nar/gkm260}, author = {Al-Shahrour, F{\'a}tima and Minguez, Pablo and T{\'a}rraga, Joaqu{\'\i}n and Medina, Ignacio and Alloza, Eva and Montaner, David and Dopazo, Joaquin} } @article {608, title = {ISACGH: a web-based environment for the analysis of Array CGH and gene expression which includes functional profiling.}, journal = {Nucleic Acids Res}, volume = {35}, year = {2007}, month = {2007 Jul}, pages = {W81-5}, abstract = {

We present the ISACGH, a web-based system that allows for the combination of genomic data with gene expression values and provides different options for functional profiling of the regions found. Several visualization options offer a convenient representation of the results. Different efficient methods for accurate estimation of genomic copy number from array-CGH hybridization data have been included in the program. Moreover, the connection to the gene expression analysis package GEPAS allows the use of different facilities for data pre-processing and analysis. A DAS server allows exporting the results to the Ensembl viewer where contextual genomic information can be obtained. The program is freely available at: http://isacgh.bioinfo.cipf.es or within http://www.gepas.org.

}, keywords = {Animals, Cluster Analysis, Computational Biology, Computer Graphics, Gene Expression Profiling, Humans, Internet, Models, Genetic, Nucleic Acid Hybridization, Oligonucleotide Array Sequence Analysis, Programming Languages, Software, Systems Integration, User-Computer Interface}, issn = {1362-4962}, doi = {10.1093/nar/gkm257}, author = {Conde, Lucia and Montaner, David and Burguet-Castell, Jordi and T{\'a}rraga, Joaqu{\'\i}n and Medina, Ignacio and Al-Shahrour, F{\'a}tima and Dopazo, Joaquin} } @article {17468499, title = {ISACGH: a web-based environment for the analysis of Array CGH and gene expression which includes functional profiling}, journal = {Nucleic Acids Res}, volume = {35}, number = {Web Server issue}, year = {2007}, note = {Conde, Lucia Montaner, David Burguet-Castell, Jordi Tarraga, Joaquin Medina, Ignacio Al-Shahrour, Fatima Dopazo, Joaquin Research Support, Non-U.S. Gov{\textquoteright}t England Nucleic acids research Nucleic Acids Res. 2007 Jul;35(Web Server issue):W81-5. Epub 2007 Apr 27.}, pages = {W81-5}, abstract = {We present the ISACGH, a web-based system that allows for the combination of genomic data with gene expression values and provides different options for functional profiling of the regions found. Several visualization options offer a convenient representation of the results. Different efficient methods for accurate estimation of genomic copy number from array-CGH hybridization data have been included in the program. Moreover, the connection to the gene expression analysis package GEPAS allows the use of different facilities for data pre-processing and analysis. A DAS server allows exporting the results to the Ensembl viewer where contextual genomic information can be obtained. The program is freely available at: http://isacgh.bioinfo.cipf.es or within http://www.gepas.org.}, keywords = {Animals Cluster Analysis Computational Biology/*methods Computer Graphics Gene Expression Profiling/*methods Humans Internet Models, Genetic *Nucleic Acid Hybridization Oligonucleotide Array Sequence Analysis/*methods Programming Languages *Software Systems Integration User-Computer Interface}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=17468499}, author = {L. Conde and Montaner, D. and Burguet-Castell, J. and Tarraga, J. and Medina, Ignacio and Fatima Al-Shahrour and Dopazo, J.} } @article {17452346, title = {Phylemon: a suite of web tools for molecular evolution, phylogenetics and phylogenomics}, journal = {Nucleic Acids Res}, volume = {35}, number = {Web Server issue}, year = {2007}, note = {Tarraga, Joaquin Medina, Ignacio Arbiza, Leonardo Huerta-Cepas, Jaime Gabaldon, Toni Dopazo, Joaquin Dopazo, Hernan Research Support, Non-U.S. Gov{\textquoteright}t England Nucleic acids research Nucleic Acids Res. 2007 Jul;35(Web Server issue):W38-42. Epub 2007 Apr 22.}, pages = {W38-42}, abstract = {Phylemon is an online platform for phylogenetic and evolutionary analyses of molecular sequence data. It has been developed as a web server that integrates a suite of different tools selected among the most popular stand-alone programs in phylogenetic and evolutionary analysis. It has been conceived as a natural response to the increasing demand of data analysis of many experimental scientists wishing to add a molecular evolution and phylogenetics insight into their research. Tools included in Phylemon cover a wide yet selected range of programs: from the most basic for multiple sequence alignment to elaborate statistical methods of phylogenetic reconstruction including methods for evolutionary rates analyses and molecular adaptation. Phylemon has several features that differentiates it from other resources: (i) It offers an integrated environment that enables the direct concatenation of evolutionary analyses, the storage of results and handles required data format conversions, (ii) Once an outfile is produced, Phylemon suggests the next possible analyses, thus guiding the user and facilitating the integration of multi-step analyses, and (iii) users can define and save complete pipelines for specific phylogenetic analysis to be automatically used on many genes in subsequent sessions or multiple genes in a single session (phylogenomics). The Phylemon web server is available at http://phylemon.bioinfo.cipf.es.}, keywords = {Animals Computational Biology/*methods Databases, DNA Sequence Analysis, Genetic Evolution, Molecular Genetic Techniques Humans *Internet Models, Protein Software User-Computer Interface, Statistical *Phylogeny Programming Languages Sequence Alignment Sequence Analysis}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=17452346}, author = {Tarraga, J. and Medina, Ignacio and Arbiza, L. and Huerta-Cepas, J. and Gabald{\'o}n, T. and Dopazo, J. and H. Dopazo} } @article {17138587, title = {Prophet, a web-based tool for class prediction using microarray data}, journal = {Bioinformatics}, volume = {23}, number = {3}, year = {2007}, note = {

Medina, Ignacio Montaner, David Tarraga, Joaquin Dopazo, Joaquin Research Support, Non-U.S. Gov{\textquoteright}t England Bioinformatics (Oxford, England) Bioinformatics. 2007 Feb 1;23(3):390-1. Epub 2006 Nov 30.

}, pages = {390-1}, abstract = {

Sample classification and class prediction is the aim of many gene expression studies. We present a web-based application, Prophet, which builds prediction rules and allows using them for further sample classification. Prophet automatically chooses the best classifier, along with the optimal selection of genes, using a strategy that renders unbiased cross-validated errors. Prophet is linked to different microarray data analysis modules, and includes a unique feature: the possibility of performing the functional interpretation of the molecular signature found. Availability: Prophet can be found at the URL http://prophet.bioinfo.cipf.es/ or within the GEPAS package at http://www.gepas.org/ Supplementary information: http://gepas.bioinfo.cipf.es/tutorial/prophet.html.

}, keywords = {babelomics, gepas, predictors}, url = {http://bioinformatics.oxfordjournals.org/cgi/content/full/23/3/390?view=long\&pmid=17138587}, author = {Medina, Ignacio and Montaner, D. and Tarraga, J. and Dopazo, J.} }