@article {744, title = {Highly accurate whole-genome imputation of SARS-CoV-2 from partial or low-quality sequences.}, journal = {Gigascience}, volume = {10}, year = {2021}, month = {2021 12 02}, abstract = {

BACKGROUND: The current SARS-CoV-2 pandemic has emphasized the utility of viral whole-genome sequencing in the surveillance and control of the pathogen. An unprecedented ongoing global initiative is producing hundreds of thousands of sequences worldwide. However, the complex circumstances in which viruses are sequenced, along with the demand of urgent results, causes a high rate of incomplete and, therefore, useless sequences. Viral sequences evolve in the context of a complex phylogeny and different positions along the genome are in linkage disequilibrium. Therefore, an imputation method would be able to predict missing positions from the available sequencing data.

RESULTS: We have developed the impuSARS application, which takes advantage of the enormous number of SARS-CoV-2 genomes available, using a reference panel containing 239,301 sequences, to produce missing data imputation in viral genomes. ImpuSARS was tested in a wide range of conditions (continuous fragments, amplicons or sparse individual positions missing), showing great fidelity when reconstructing the original sequences, recovering the lineage with a 100\% precision for almost all the lineages, even in very poorly covered genomes (<20\%).

CONCLUSIONS: Imputation can improve the pace of SARS-CoV-2 sequencing production by recovering many incomplete or low-quality sequences that would be otherwise discarded. ImpuSARS can be incorporated in any primary data processing pipeline for SARS-CoV-2 whole-genome sequencing.

}, keywords = {Genome, Viral, Phylogeny, SARS-CoV-2, Whole Genome Sequencing}, issn = {2047-217X}, doi = {10.1093/gigascience/giab078}, author = {Ortuno, Francisco M and Loucera, Carlos and Casimiro-Soriguer, Carlos S and Lepe, Jose A and Camacho Martinez, Pedro and Merino Diaz, Laura and de Salazar, Adolfo and Chueca, Natalia and Garc{\'\i}a, Federico and Perez-Florido, Javier and Dopazo, Joaquin} } @article {387, title = {HGVA: the Human Genome Variation Archive.}, journal = {Nucleic Acids Res}, volume = {45}, year = {2017}, month = {2017 07 03}, pages = {W189-W194}, abstract = {

High-profile genomic variation projects like the 1000 Genomes project or the Exome Aggregation Consortium, are generating a wealth of human genomic variation knowledge which can be used as an essential reference for identifying disease-causing genotypes. However, accessing these data, contrasting the various studies and integrating those data in downstream analyses remains cumbersome. The Human Genome Variation Archive (HGVA) tackles these challenges and facilitates access to genomic data for key reference projects in a clean, fast and integrated fashion. HGVA provides an efficient and intuitive web-interface for easy data mining, a comprehensive RESTful API and client libraries in Python, Java and JavaScript for fast programmatic access to its knowledge base. HGVA calculates population frequencies for these projects and enriches their data with variant annotation provided by CellBase, a rich and fast annotation solution. HGVA serves as a proof-of-concept of the genome analysis developments being carried out by the University of Cambridge together with UK{\textquoteright}s 100 000 genomes project and the National Institute for Health Research BioResource Rare-Diseases, in particular, deploying open-source for Computational Biology (OpenCB) software platform for storing and analyzing massive genomic datasets.

}, keywords = {Genetic Variation, Genome, Human, Humans, Internet, Software, User-Computer Interface}, issn = {1362-4962}, doi = {10.1093/nar/gkx445}, url = {https://academic.oup.com/nar/article-lookup/doi/10.1093/nar/gkx445}, author = {Lopez, Javier and Coll, Jacobo and Haimel, Matthias and Kandasamy, Swaathi and T{\'a}rraga, Joaqu{\'\i}n and Furio-Tari, Pedro and Bari, Wasim and Bleda, Marta and Rueda, Antonio and Gr{\"a}f, Stefan and Rendon, Augusto and Dopazo, Joaquin and Medina, Ignacio} } @article {434, title = {High throughput estimation of functional cell activities reveals disease mechanisms and predicts relevant clinical outcomes.}, journal = {Oncotarget}, volume = {8}, year = {2017}, month = {2017 Jan 17}, pages = {5160-5178}, abstract = {

Understanding the aspects of the cell functionality that account for disease or drug action mechanisms is a main challenge for precision medicine. Here we propose a new method that models cell signaling using biological knowledge on signal transduction. The method recodes individual gene expression values (and/or gene mutations) into accurate measurements of changes in the activity of signaling circuits, which ultimately constitute high-throughput estimations of cell functionalities caused by gene activity within the pathway. Moreover, such estimations can be obtained either at cohort-level, in case/control comparisons, or personalized for individual patients. The accuracy of the method is demonstrated in an extensive analysis involving 5640 patients from 12 different cancer types. Circuit activity measurements not only have a high diagnostic value but also can be related to relevant disease outcomes such as survival, and can be used to assess therapeutic interventions.

}, keywords = {Computational Biology, gene expression, Gene Regulatory Networks, Humans, mutation, Neoplasms, Precision Medicine, Sequence Analysis, RNA, Signal Transduction}, issn = {1949-2553}, doi = {10.18632/oncotarget.14107}, author = {Hidalgo, Marta R and Cubuk, Cankut and Amadoz, Alicia and Salavert, Francisco and Carbonell-Caballero, Jos{\'e} and Dopazo, Joaquin} } @article {441, title = {Highly sensitive and ultrafast read mapping for RNA-seq analysis.}, journal = {DNA Res}, volume = {23}, year = {2016}, month = {2016 Apr}, pages = {93-100}, abstract = {

As sequencing technologies progress, the amount of data produced grows exponentially, shifting the bottleneck of discovery towards the data analysis phase. In particular, currently available mapping solutions for RNA-seq leave room for improvement in terms of sensitivity and performance, hindering an efficient analysis of transcriptomes by massive sequencing. Here, we present an innovative approach that combines re-engineering, optimization and parallelization. This solution results in a significant increase of mapping sensitivity over a wide range of read lengths and substantial shorter runtimes when compared with current RNA-seq mapping methods available.

}, keywords = {Genomics, High-Throughput Nucleotide Sequencing, Humans, Sensitivity and Specificity, Sequence Analysis, RNA, Transcriptome}, issn = {1756-1663}, doi = {10.1093/dnares/dsv039}, author = {Medina, I and T{\'a}rraga, J and Mart{\'\i}nez, H and Barrachina, S and Castillo, M I and Paschall, J and Salavert-Torres, J and Blanquer-Espert, I and Hern{\'a}ndez-Garc{\'\i}a, V and Quintana-Ort{\'\i}, E S and Dopazo, J} } @article {1195, title = {HPG pore: an efficient and scalable framework for nanopore sequencing data.}, journal = {BMC bioinformatics}, volume = {17}, year = {2016}, month = {2016}, pages = {107}, abstract = {BACKGROUND: The use of nanopore technologies is expected to spread in the future because they are portable and can sequence long fragments of DNA molecules without prior amplification. The first nanopore sequencer available, the MinION{\texttrademark} from Oxford Nanopore Technologies, is a USB-connected, portable device that allows real-time DNA analysis. In addition, other new instruments are expected to be released soon, which promise to outperform the current short-read technologies in terms of throughput. Despite the flood of data expected from this technology, the data analysis solutions currently available are only designed to manage small projects and are not scalable. RESULTS: Here we present HPG Pore, a toolkit for exploring and analysing nanopore sequencing data. HPG Pore can run on both individual computers and in the Hadoop distributed computing framework, which allows easy scale-up to manage the large amounts of data expected to result from extensive use of nanopore technologies in the future. CONCLUSIONS: HPG Pore allows for virtually unlimited sequencing data scalability, thus guaranteeing its continued management in near future scenarios. HPG Pore is available in GitHub at http://github.com/opencb/hpg-pore .}, keywords = {hadoop, HPC, nanopore, NGS}, issn = {1471-2105}, doi = {10.1186/s12859-016-0966-0}, url = {http://www.biomedcentral.com/1471-2105/17/107}, author = {T{\'a}rraga, Joaqu{\'\i}n and Gallego, Asunci{\'o}n and Arnau, Vicente and Medina, Ignacio and Dopazo, Joaquin} } @article {560, title = {HPG pore: an efficient and scalable framework for nanopore sequencing data}, journal = {BMC Bioinformatics}, volume = {17}, year = {2016}, month = {Jan-12-2016}, doi = {10.1186/s12859-016-0966-0}, url = {http://www.biomedcentral.com/1471-2105/17/107http://link.springer.com/content/pdf/10.1186/s12859-016-0966-0}, author = {T{\'a}rraga, Joaqu{\'\i}n and Gallego, Asunci{\'o}n and Arnau, Vicente and Medina, Ignacio and Dopazo, Joaquin} } @article {561, title = {Human DNA methylomes of neurodegenerative diseases show common epigenomic patterns.}, journal = {Transl Psychiatry}, volume = {6}, year = {2016}, month = {2016 Jan 19}, pages = {e718}, abstract = {

Different neurodegenerative disorders often show similar lesions, such as the presence of amyloid plaques, TAU-neurotangles and synuclein inclusions. The genetically inherited forms are rare, so we wondered whether shared epigenetic aberrations, such as those affecting DNA methylation, might also exist. The studied samples were gray matter samples from the prefrontal cortex of control and neurodegenerative disease-associated cases. We performed the DNA methylation analyses of Alzheimer{\textquoteright}s disease, dementia with Lewy bodies, Parkinson{\textquoteright}s disease and Alzheimer-like neurodegenerative profile associated with Down{\textquoteright}s syndrome samples. The DNA methylation landscapes obtained show that neurodegenerative diseases share similar aberrant CpG methylation shifts targeting a defined gene set. Our findings suggest that neurodegenerative disorders might have similar pathogenetic mechanisms that subsequently evolve into different clinical entities. The identified aberrant DNA methylation changes can be used as biomarkers of the disorders and as potential new targets for the development of new therapies.

}, keywords = {Adult, Aged, Aged, 80 and over, DNA Methylation, Epigenomics, Female, Humans, Male, Middle Aged, neurodegenerative diseases, Prefrontal Cortex, Tissue Array Analysis}, issn = {2158-3188}, doi = {10.1038/tp.2015.214}, author = {Sanchez-Mut, J V and Heyn, H and Vidal, E and Moran, S and Sayols, S and Delgado-Morales, R and Schultz, M D and Ansoleaga, B and Garcia-Esparcia, P and Pons-Espinal, M and de Lagran, M M and Dopazo, J and Rabano, A and Avila, J and Dierssen, M and Lott, I and Ferrer, I and Ecker, J R and Esteller, M} } @article {21899556, title = {Histone modifications and expression of DAM6 gene in peach are modulated during bud dormancy release in a cultivar-dependent manner.}, journal = {The New phytologist}, year = {2011}, month = {2011 Sep 7}, abstract = {

\• Bud dormancy release in many woody perennial plants responds to the seasonal accumulation of chilling stimulus. MADS-box transcription factors encoded by DORMANCY ASSOCIATED MADS-box (DAM) genes in peach (Prunus persica) are implicated in this pathway, but other regulatory factors remain to be identified. In addition, the regulation of DAM gene expression is not well known at the molecular level. \• A microarray hybridization approach was performed to identify genes whose expression correlates with the bud dormancy-related behaviour in 10 different peach cultivars. Histone modifications in DAM6 gene were investigated by chromatin immunoprecipitation in two different cultivars. \• The expression of DAM4-DAM6 and several genes related to abscisic acid and drought stress response correlated with the dormancy behaviour of peach cultivars. The trimethylation of histone H3 at K27 in the DAM6 promoter, coding region and the second large intron was preceded by a decrease in acetylated H3 and trimethylated H3K4 in the region of translation start, coinciding with repression of DAM6 during dormancy release. \• Analysis of chromatin modifications reinforced the role of epigenetic mechanisms in DAM6 regulation and bud dormancy release, and highlighted common features with the vernalization process in Arabidopsis thaliana and cereals.

}, doi = {10.1111/j.1469-8137.2011.03863.x}, author = {Leida, Carmen and Ana Conesa and Ll{\'a}cer, Gerardo and Badenes, Mar{\'\i}a Luisa and R{\'\i}os, Gabino} } @article {572, title = {Hypoxia promotes efficient differentiation of human embryonic stem cells to functional endothelium.}, journal = {Stem Cells}, volume = {28}, year = {2010}, month = {2010 Mar 31}, pages = {407-18}, abstract = {

Early development of mammalian embryos occurs in an environment of relative hypoxia. Nevertheless, human embryonic stem cells (hESC), which are derived from the inner cell mass of blastocyst, are routinely cultured under the same atmospheric conditions (21\% O(2)) as somatic cells. We hypothesized that O(2) levels modulate gene expression and differentiation potential of hESC, and thus, we performed gene profiling of hESC maintained under normoxic or hypoxic (1\% or 5\% O(2)) conditions. Our analysis revealed that hypoxia downregulates expression of pluripotency markers in hESC but increases significantly the expression of genes associated with angio- and vasculogenesis including vascular endothelial growth factor and angiopoitein-like proteins. Consequently, we were able to efficiently differentiate hESC to functional endothelial cells (EC) by varying O(2) levels; after 24 hours at 5\% O(2), more than 50\% of cells were CD34+. Transplantation of resulting endothelial-like cells improved both systolic function and fractional shortening in a rodent model of myocardial infarction. Moreover, analysis of the infarcted zone revealed that transplanted EC reduced the area of fibrous scar tissue by 50\%. Thus, use of hypoxic conditions to specify the endothelial lineage suggests a novel strategy for cellular therapies aimed at repair of damaged vasculature in pathologies such as cerebral ischemia and myocardial infarction.

}, keywords = {Angiopoietin-1, Animals, biomarkers, Cell Culture Techniques, Cell Differentiation, Cell Hypoxia, Cell Transplantation, Cells, Cultured, Down-Regulation, Embryonic Stem Cells, Endothelial Cells, Gene Expression Profiling, Gene Expression Regulation, Humans, Male, Myocardial Infarction, Neovascularization, Physiologic, Oxygen, Pluripotent Stem Cells, Rats, Rats, Nude, Vascular Endothelial Growth Factor A}, issn = {1549-4918}, doi = {10.1002/stem.295}, author = {Prado-Lopez, Sonia and Conesa, Ana and Armi{\~n}{\'a}n, Ana and Mart{\'\i}nez-Losa, Magdalena and Escobedo-Lucea, Carmen and Gandia, Carolina and Tarazona, Sonia and Melguizo, Dario and Blesa, David and Montaner, David and Sanz-Gonz{\'a}lez, Silvia and Sep{\'u}lveda, Pilar and G{\"o}tz, Stefan and O{\textquoteright}Connor, Jos{\'e} Enrique and Moreno, Ruben and Dopazo, Joaquin and Burks, Deborah J and Stojkovic, Miodrag} } @article {594, title = {High-throughput functional annotation and data mining with the Blast2GO suite.}, journal = {Nucleic Acids Res}, volume = {36}, year = {2008}, month = {2008 Jun}, pages = {3420-35}, abstract = {

Functional genomics technologies have been widely adopted in the biological research of both model and non-model species. An efficient functional annotation of DNA or protein sequences is a major requirement for the successful application of these approaches as functional information on gene products is often the key to the interpretation of experimental results. Therefore, there is an increasing need for bioinformatics resources which are able to cope with large amount of sequence data, produce valuable annotation results and are easily accessible to laboratories where functional genomics projects are being undertaken. We present the Blast2GO suite as an integrated and biologist-oriented solution for the high-throughput and automatic functional annotation of DNA or protein sequences based on the Gene Ontology vocabulary. The most outstanding Blast2GO features are: (i) the combination of various annotation strategies and tools controlling type and intensity of annotation, (ii) the numerous graphical features such as the interactive GO-graph visualization for gene-set function profiling or descriptive charts, (iii) the general sequence management features and (iv) high-throughput capabilities. We used the Blast2GO framework to carry out a detailed analysis of annotation behaviour through homology transfer and its impact in functional genomics research. Our aim is to offer biologists useful information to take into account when addressing the task of functionally characterizing their sequence data.

}, keywords = {Animals, Computational Biology, Computer Graphics, Databases, Genetic, Expressed Sequence Tags, Genes, Genomics, Sequence Analysis, DNA, Sequence Analysis, Protein, Software, Vocabulary, Controlled}, issn = {1362-4962}, doi = {10.1093/nar/gkn176}, author = {G{\"o}tz, Stefan and Garc{\'\i}a-G{\'o}mez, Juan Miguel and Terol, Javier and Williams, Tim D and Nagaraj, Shivashankar H and Nueda, Maria Jos{\'e} and Robles, Montserrat and Talon, Manuel and Dopazo, Joaquin and Conesa, Ana} } @article {17567924, title = {The human phylome}, journal = {Genome Biol}, volume = {8}, number = {6}, year = {2007}, note = {Huerta-Cepas, Jaime Dopazo, Hernan Dopazo, Joaquin Gabaldon, Toni Research Support, Non-U.S. Gov{\textquoteright}t England Genome biology Genome Biol. 2007;8(6):R109.}, pages = {R109}, abstract = {BACKGROUND: Phylogenomics analyses serve to establish evolutionary relationships among organisms and their genes. A phylome, the complete collection of all gene phylogenies in a genome, constitutes a valuable source of information, but its use in large genomes still constitutes a technical challenge. The use of phylomes also requires the development of new methods that help us to interpret them. RESULTS: We reconstruct here the human phylome, which includes the evolutionary relationships of all human proteins and their homologs among 39 fully sequenced eukaryotes. Phylogenetic techniques used include alignment trimming, branch length optimization, evolutionary model testing and maximum likelihood and Bayesian methods. Although differences with alternative topologies are minor, most of the trees support the Coelomata and Unikont hypotheses as well as the grouping of primates with laurasatheria to the exclusion of rodents. We assess the extent of gene duplication events and their relationship with the functional roles of the protein families involved. We find support for at least one, and probably two, rounds of whole genome duplications before vertebrate radiation. Using a novel algorithm that is independent from a species phylogeny, we derive orthology and paralogy relationships of human proteins among eukaryotic genomes. CONCLUSION: Topological variations among phylogenies for different genes are to be expected, highlighting the danger of gene-sampling effects in phylogenomic analyses. Several links can be established between the functions of gene families duplicated at certain phylogenetic splits and major evolutionary transitions in those lineages. The pipeline implemented here can be easily adapted for use in other organisms.}, keywords = {Animals *Evolution Evolution, DNA, Molecular Gene Duplication *Genome Humans *Phylogeny Proteins/genetics Sequence Analysis}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=17567924}, author = {Huerta-Cepas, J. and H. Dopazo and Dopazo, J. and Gabald{\'o}n, T.} } @article {15608250, title = {HCAD, closing the gap between breakpoints and genes}, journal = {Nucleic Acids Res}, volume = {33}, number = {Database issue}, year = {2005}, note = {Hoffmann, Robert Dopazo, Joaquin Cigudosa, Juan C Valencia, Alfonso Research Support, Non-U.S. Gov{\textquoteright}t England Nucleic acids research Nucleic Acids Res. 2005 Jan 1;33(Database issue):D511-3.}, pages = {D511-3}, abstract = {Recurrent chromosome aberrations are an important resource when associating human pathologies to specific genes. However, for technical reasons a large number of chromosome breakpoints are defined only at the level of cytobands and many of the genes involved remain unidentified. We developed a web-based information system that mines the scientific literature and generates textual and comprehensive information on all human breakpoints. We show that the statistical analysis of this textual information and its combination with genomic data can identify genes directly involved in DNA rearrangements. The Human Chromosome Aberration Database (HCAD) is publicly accessible at http://www.pdg.cnb.uam.es/UniPub/HCAD/.}, keywords = {*Chromosome Breakage Chromosome Disorders/diagnosis/*genetics *Databases, Genetic Genes *Genetic Predisposition to Disease Humans PubMed Systems Integration}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=15608250}, author = {Hoffmann, R. and Dopazo, J. and Cigudosa, J. C. and Valencia, A.} } @article {15591357, title = {Highly specific and accurate selection of siRNAs for high-throughput functional assays}, journal = {Bioinformatics}, volume = {21}, number = {8}, year = {2005}, note = {Santoyo, Javier Vaquerizas, Juan M Dopazo, Joaquin Comparative Study Evaluation Studies Research Support, Non-U.S. Gov{\textquoteright}t England Bioinformatics (Oxford, England) Bioinformatics. 2005 Apr 15;21(8):1376-82. Epub 2004 Dec 10.}, pages = {1376-82}, abstract = {MOTIVATION: Small interfering RNA (siRNA) is widely used in functional genomics to silence genes by decreasing their expression to study the resulting phenotypes. The possibility of performing large-scale functional assays by gene silencing accentuates the necessity of a software capable of the high-throughput design of highly specific siRNA. The main objective sought was the design of a large number of siRNAs with appropriate thermodynamic properties and, especially, high specificity. Since all the available procedures require, to some extent, manual processing of the results to guarantee specific results, specificity constitutes to date, the major obstacle to the complete automation of all the steps necessary for the selection of optimal candidate siRNAs. RESULT: Here, we present a program that for the first time completely automates the search for siRNAs. In SiDE, the most complete set of rules for the selection of siRNA candidates (including G+C content, nucleotides at determined positions, thermodynamic properties, propensity to form internal hairpins, etc.) is implemented and moreover, specificity is achieved by a conceptually new method. After selecting possible siRNA candidates with the optimal functional properties, putative unspecific matches, which can cause cross-hybridization, are checked in databases containing a unique entry for each gene. These truly non-redundant databases are constructed from the genome annotations (Ensembl). Also intron/exon boundaries, presence of polymorphisms (single nucleotide polymorphisms) specificity for either gene or transcript, and other features can be selected to be considered in the design of siRNAs. AVAILABILITY: The program is available as a web server at http://side.bioinfo.cnio.es. The program was written under the GPL license. CONTACT: jdopazo@cnio.es.}, keywords = {*Algorithms Base Sequence *Gene Silencing Molecular Sequence Data RNA, RNA/*methods *Software *User-Computer Interface, Small Interfering/*genetics Sequence Alignment/*methods Sequence Analysis}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=15591357}, author = {J. Santoyo and Vaquerizas, J. M. and Dopazo, J.} } @article {11238068, title = {A hierarchical unsupervised growing neural network for clustering gene expression patterns}, journal = {Bioinformatics}, volume = {17}, number = {2}, year = {2001}, note = {Herrero, J Valencia, A Dopazo, J Research Support, Non-U.S. Gov{\textquoteright}t England Bioinformatics (Oxford, England) Bioinformatics. 2001 Feb;17(2):126-36.}, pages = {126-36}, abstract = {MOTIVATION: We describe a new approach to the analysis of gene expression data coming from DNA array experiments, using an unsupervised neural network. DNA array technologies allow monitoring thousands of genes rapidly and efficiently. One of the interests of these studies is the search for correlated gene expression patterns, and this is usually achieved by clustering them. The Self-Organising Tree Algorithm, (SOTA) (Dopazo,J. and Carazo,J.M. (1997) J. Mol. Evol., 44, 226-233), is a neural network that grows adopting the topology of a binary tree. The result of the algorithm is a hierarchical cluster obtained with the accuracy and robustness of a neural network. RESULTS: SOTA clustering confers several advantages over classical hierarchical clustering methods. SOTA is a divisive method: the clustering process is performed from top to bottom, i.e. the highest hierarchical levels are resolved before going to the details of the lowest levels. The growing can be stopped at the desired hierarchical level. Moreover, a criterion to stop the growing of the tree, based on the approximate distribution of probability obtained by randomisation of the original data set, is provided. By means of this criterion, a statistical support for the definition of clusters is proposed. In addition, obtaining average gene expression patterns is a built-in feature of the algorithm. Different neurons defining the different hierarchical levels represent the averages of the gene expression patterns contained in the clusters. Since SOTA runtimes are approximately linear with the number of items to be classified, it is especially suitable for dealing with huge amounts of data. The method proposed is very general and applies to any data providing that they can be coded as a series of numbers and that a computable measure of similarity between data items can be used. AVAILABILITY: A server running the program can be found at: http://bioinfo.cnio.es/sotarray.}, keywords = {*Algorithms Automatic Data Processing *Gene Expression Profiling *Neural Networks (Computer) *Oligonucleotide Array Sequence Analysis}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=11238068}, author = {Herrero, J. and Valencia, A. and Dopazo, J.} }