@article {805, title = {Drug-target identification in COVID-19 disease mechanisms using computational systems biology approaches.}, journal = {Front Immunol}, volume = {14}, year = {2024}, month = {2023}, pages = {1282859}, abstract = {

INTRODUCTION: The COVID-19 Disease Map project is a large-scale community effort uniting 277 scientists from 130 Institutions around the globe. We use high-quality, mechanistic content describing SARS-CoV-2-host interactions and develop interoperable bioinformatic pipelines for novel target identification and drug repurposing.

METHODS: Extensive community work allowed an impressive step forward in building interfaces between Systems Biology tools and platforms. Our framework can link biomolecules from omics data analysis and computational modelling to dysregulated pathways in a cell-, tissue- or patient-specific manner. Drug repurposing using text mining and AI-assisted analysis identified potential drugs, chemicals and microRNAs that could target the identified key factors.

RESULTS: Results revealed drugs already tested for anti-COVID-19 efficacy, providing a mechanistic context for their mode of action, and drugs already in clinical trials for treating other diseases, never tested against COVID-19.

DISCUSSION: The key advance is that the proposed framework is versatile and expandable, offering a significant upgrade in the arsenal for virus-host interactions and other complex pathologies.

}, keywords = {Computer Simulation, COVID-19, drug repositioning, Humans, SARS-CoV-2, Systems biology}, issn = {1664-3224}, doi = {10.3389/fimmu.2023.1282859}, author = {Niarakis, Anna and Ostaszewski, Marek and Mazein, Alexander and Kuperstein, Inna and Kutmon, Martina and Gillespie, Marc E and Funahashi, Akira and Acencio, Marcio Luis and Hemedan, Ahmed and Aichem, Michael and Klein, Karsten and Czauderna, Tobias and Burtscher, Felicia and Yamada, Takahiro G and Hiki, Yusuke and Hiroi, Noriko F and Hu, Finterly and Pham, Nhung and Ehrhart, Friederike and Willighagen, Egon L and Valdeolivas, Alberto and Dugourd, Aur{\'e}lien and Messina, Francesco and Esteban-Medina, Marina and Pe{\~n}a-Chilet, Maria and Rian, Kinza and Soliman, Sylvain and Aghamiri, Sara Sadat and Puniya, Bhanwar Lal and Naldi, Aur{\'e}lien and Helikar, Tom{\'a}{\v s} and Singh, Vidisha and Fern{\'a}ndez, Marco Fari{\~n}as and Bermudez, Viviam and Tsirvouli, Eirini and Montagud, Arnau and No{\"e}l, Vincent and Ponce-de-Leon, Miguel and Maier, Dieter and Bauch, Angela and Gyori, Benjamin M and Bachman, John A and Luna, Augustin and Pi{\~n}ero, Janet and Furlong, Laura I and Balaur, Irina and Rougny, Adrien and Jarosz, Yohan and Overall, Rupert W and Phair, Robert and Perfetto, Livia and Matthews, Lisa and Rex, Devasahayam Arokia Balaya and Orlic-Milacic, Marija and Gomez, Luis Cristobal Monraz and De Meulder, Bertrand and Ravel, Jean Marie and Jassal, Bijay and Satagopam, Venkata and Wu, Guanming and Golebiewski, Martin and Gawron, Piotr and Calzone, Laurence and Beckmann, Jacques S and Evelo, Chris T and D{\textquoteright}Eustachio, Peter and Schreiber, Falk and Saez-Rodriguez, Julio and Dopazo, Joaquin and Kuiper, Martin and Valencia, Alfonso and Wolkenhauer, Olaf and Kitano, Hiroaki and Barillot, Emmanuel and Auffray, Charles and Balling, Rudi and Schneider, Reinhard} } @article {796, title = {Defective extracellular matrix remodeling in brown adipose tissue is associated with fibro-inflammation and reduced diet-induced thermogenesis.}, journal = {Cell Rep}, volume = {42}, year = {2023}, month = {2023 Jun 13}, pages = {112640}, abstract = {

The relevance of extracellular matrix (ECM) remodeling is reported in white adipose tissue (AT) and obesity-related dysfunctions, but little is known about the importance of ECM remodeling in brown AT (BAT) function. Here, we show that a time course of high-fat diet (HFD) feeding progressively impairs diet-induced thermogenesis concomitantly with the development of fibro-inflammation in BAT. Higher markers of fibro-inflammation are associated with lower cold-induced BAT activity in humans. Similarly, when mice are housed at thermoneutrality, inactivated BAT features fibro-inflammation. We validate the pathophysiological relevance of BAT ECM remodeling in response to temperature challenges and HFD using a model of a primary defect in the collagen turnover mediated by partial ablation of the Pepd prolidase. Pepd-heterozygous mice display exacerbated dysfunction and BAT fibro-inflammation at thermoneutrality and in HFD. Our findings show the relevance of ECM remodeling in BAT activation and provide a mechanism for BAT dysfunction in obesity.

}, issn = {2211-1247}, doi = {10.1016/j.celrep.2023.112640}, author = {Pellegrinelli, Vanessa and Figueroa-Ju{\'a}rez, Elizabeth and Samuelson, Isabella and U-Din, Mueez and Rodriguez-Fdez, Sonia and Virtue, Samuel and Leggat, Jennifer and Cubuk, Cankut and Peirce, Vivian J and Niemi, Tarja and Campbell, Mark and Rodriguez-Cuenca, Sergio and Dopazo, Joaquin and Carobbio, Stefania and Virtanen, Kirsi A and Vidal-Puig, Antonio} } @article {766, title = {Detection of High Level of Co-Infection and the Emergence of Novel SARS CoV-2 Delta-Omicron and Omicron-Omicron Recombinants in the Epidemiological Surveillance of Andalusia.}, journal = {Int J Mol Sci}, volume = {24}, year = {2023}, month = {2023 Jan 26}, abstract = {

Recombination is an evolutionary strategy to quickly acquire new viral properties inherited from the parental lineages. The systematic survey of the SARS-CoV-2 genome sequences of the Andalusian genomic surveillance strategy has allowed the detection of an unexpectedly high number of co-infections, which constitute the ideal scenario for the emergence of new recombinants. Whole genome sequence of SARS-CoV-2 has been carried out as part of the genomic surveillance programme. Sample sources included the main hospitals in the Andalusia region. In addition to the increase of co-infections and known recombinants, three novel SARS-CoV-2 delta-omicron and omicron-omicron recombinant variants with two break points have been detected. Our observations document an epidemiological scenario in which co-infection and recombination are detected more frequently. Finally, we describe a family case in which co-infection is followed by the detection of a recombinant made from the two co-infecting variants. This increased number of recombinants raises the risk of emergence of recombinant variants with increased transmissibility and pathogenicity.

}, issn = {1422-0067}, doi = {10.3390/ijms24032419}, author = {Perez-Florido, Javier and Casimiro-Soriguer, Carlos S and Ortuno, Francisco and Fernandez-Rueda, Jose L and Aguado, Andrea and Lara, Mar{\'\i}a and Riazzo, Cristina and Rodriguez-Iglesias, Manuel A and Camacho-Martinez, Pedro and Merino-Diaz, Laura and Pupo-Ledo, Inmaculada and de Salazar, Adolfo and Vi{\~n}uela, Laura and Fuentes, Ana and Chueca, Natalia and Garc{\'\i}a, Federico and Dopazo, Joaquin and Lepe, Jose A} } @article {751, title = {Discovering potential interactions between rare diseases and COVID-19 by combining mechanistic models of viral infection with statistical modeling.}, journal = {Hum Mol Genet}, year = {2022}, month = {2022 Jan 12}, abstract = {

Recent studies have demonstrated a relevant role of the host genetics in the COVID-19 prognosis. Most of the 7000 rare diseases described to date have a genetic component, typically highly penetrant. However, this vast spectrum of genetic variability remains yet unexplored with respect to possible interactions with COVID-19. Here, a mathematical mechanistic model of the COVID-19 molecular disease mechanism has been used to detect potential interactions between rare disease genes and the COVID-19 infection process and downstream consequences. Out of the 2518 disease genes analyzed, causative of 3854 rare diseases, a total of 254 genes have a direct effect on the COVID-19 molecular disease mechanism and 207 have an indirect effect revealed by a significant strong correlation. This remarkable potential of interaction occurs for more than 300 rare diseases. Mechanistic modeling of COVID-19 disease map has allowed a holistic systematic analysis of the potential interactions between the loss of function in known rare disease genes and the pathological consequences of COVID-19 infection. The results identify links between disease genes and COVID-19 hallmarks and demonstrate the usefulness of the proposed approach for future preventive measures in some rare diseases.

}, issn = {1460-2083}, doi = {10.1093/hmg/ddac007}, author = {L{\'o}pez-S{\'a}nchez, Macarena and Loucera, Carlos and Pe{\~n}a-Chilet, Maria and Dopazo, Joaquin} } @article {731, title = {De novo small deletion affecting transcription start site of short isoform of AUTS2 gene in a patient with syndromic neurodevelopmental defects.}, journal = {Am J Med Genet A}, volume = {185}, year = {2021}, month = {2021 03}, pages = {877-883}, abstract = {

Disruption of the autism susceptibility candidate 2 (AUTS2) gene through genomic rearrangements, copy number variations (CNVs), and intragenic deletions and mutations, has been recurrently involved in syndromic forms of developmental delay and intellectual disability, known as AUTS2 syndrome. The AUTS2 gene plays an important role in regulation of neuronal migration, and when altered, associates with a variable phenotype from severely to mildly affected patients. The more severe phenotypes significantly correlate with the presence of defects affecting the C-terminus part of the gene. This article reports a new patient with a syndromic neurodevelopmental disorder, who presents a deletion of 30 nucleotides in the exon 9 of the AUTS2 gene. Importantly, this deletion includes the transcription start site for the AUTS2 short transcript isoform, which has an important role in brain development. Gene expression analysis of AUTS2 full-length and short isoforms revealed that the deletion found in this patient causes a remarkable reduction in the expression level, not only of the short isoform, but also of the full AUTS2 transcripts. This report adds more evidence for the role of mutated AUTS2 short transcripts in the development of a severe phenotype in the AUTS2 syndrome.

}, keywords = {Child, Preschool, Cytoskeletal Proteins, Dwarfism, Exons, Gene Expression Regulation, Genetic Association Studies, Humans, Male, Neurodevelopmental Disorders, Protein Isoforms, RNA, Messenger, Sequence Deletion, Syndrome, Transcription Factors, Transcription Initiation Site, Transcription, Genetic}, issn = {1552-4833}, doi = {10.1002/ajmg.a.62017}, author = {Martinez-Delgado, Beatriz and Lopez-Martin, Estrella and Lara-Herguedas, Juli{\'a}n and Monzon, Sara and Cuesta, Isabel and Juli{\'a}, Miguel and Aquino, Virginia and Rodriguez-Martin, Carlos and Damian, Alejandra and Gonzalo, Irene and Gomez-Mariano, Gema and Baladron, Beatriz and Cazorla, Rosario and Iglesias, Gema and Roman, Enriqueta and Ros, Purificacion and Tutor, Pablo and Mellor, Susana and Jimenez, Carlos and Cabrejas, Maria Jose and Gonzalez-Vioque, Emiliano and Alonso, Javier and Bermejo-S{\'a}nchez, Eva and Posada, Manuel} } @article {743, title = {Deciphering Genomic Heterogeneity and the Internal Composition of Tumour Activities through a Hierarchical Factorisation Model}, journal = {Mathematics}, volume = {9}, year = {2021}, month = {Jan-11-2021}, pages = {2833}, doi = {10.3390/math9212833}, url = {https://www.mdpi.com/2227-7390/9/21/2833https://www.mdpi.com/2227-7390/9/21/2833/pdf}, author = {Carbonell-Caballero, Jos{\'e} and L{\'o}pez-Qu{\'\i}lez, Antonio and Conesa, David and Dopazo, Joaquin} } @article {720, title = {A DNA damage repair gene-associated signature predicts responses of patients with advanced soft-tissue sarcoma to treatment with trabectedin.}, journal = {Mol Oncol}, volume = {15}, year = {2021}, month = {2021 12}, pages = {3691-3705}, abstract = {

Predictive biomarkers of trabectedin represent an unmet need in advanced soft-tissue sarcomas (STS). DNA damage repair (DDR) genes, involved in homologous recombination or nucleotide excision repair, had been previously described as biomarkers of trabectedin resistance or sensitivity, respectively. The majority of these studies only focused on specific factors (ERCC1, ERCC5, and BRCA1) and did not evaluate several other DDR-related genes that could have a relevant role for trabectedin efficacy. In this retrospective translational study, 118 genes involved in DDR were evaluated to determine, by transcriptomics, a predictive gene signature of trabectedin efficacy. A six-gene predictive signature of trabectedin efficacy was built in a series of 139 tumor samples from patients with advanced STS. Patients in the high-risk gene signature group showed a significantly worse progression-free survival compared with patients in the low-risk group (2.1 vs 6.0 months, respectively). Differential gene expression analysis defined new potential predictive biomarkers of trabectedin sensitivity (PARP3 and CCNH) or resistance (DNAJB11 and PARP1). Our study identified a new gene signature that significantly predicts patients with higher probability to respond to treatment with trabectedin. Targeting some genes of this signature emerges as a potential strategy to enhance trabectedin efficacy.

}, issn = {1878-0261}, doi = {10.1002/1878-0261.12996}, author = {Moura, David S and Pe{\~n}a-Chilet, Maria and Cordero Varela, Juan Antonio and Alvarez-Alegret, Ramiro and Agra-Pujol, Carolina and Izquierdo, Francisco and Ramos, Rafael and Ortega-Medina, Luis and Martin-Davila, Francisco and Castilla-Ramirez, Carolina and Hernandez-Leon, Carmen Nieves and Romagosa, Cleofe and Vaz Salgado, Maria Angeles and Lavernia, Javier and Bagu{\'e}, Silvia and Mayodormo-Aranda, Empar and Vicioso, Luis and Hern{\'a}ndez Barcel{\'o}, Jose Emilio and Rubio-Casadevall, Jordi and de Juan, Ana and Fia{\~n}o-Valverde, Maria Concepcion and Hindi, Nadia and Lopez-Alvarez, Maria and Lacerenza, Serena and Dopazo, Joaquin and Gutierrez, Antonio and Alvarez, Rosa and Valverde, Claudia and Martinez-Trufero, Javier and Martin-Broto, Javier} } @article {728, title = {DOME: recommendations for supervised machine learning validation in biology.}, journal = {Nat Methods}, volume = {18}, year = {2021}, month = {2021 10}, pages = {1122-1127}, keywords = {Algorithms, Computational Biology, Guidelines as Topic, Humans, Models, Biological, Research Design, Supervised Machine Learning}, issn = {1548-7105}, doi = {10.1038/s41592-021-01205-4}, author = {Walsh, Ian and Fishman, Dmytro and Garcia-Gasulla, Dario and Titma, Tiina and Pollastri, Gianluca and Harrow, Jennifer and Psomopoulos, Fotis E and Tosatto, Silvio C E} } @article {713, title = {Drug repurposing for COVID-19 using machine learning and mechanistic models of signal transduction circuits related to SARS-CoV-2 infection.}, journal = {Signal Transduct Target Ther}, volume = {5}, year = {2020}, month = {2020 12 11}, pages = {290}, keywords = {Computational Chemistry, COVID-19, drug repositioning, Humans, Machine Learning, Molecular Docking Simulation, Molecular Targeted Therapy, Proteins, SARS-CoV-2, Signal Transduction}, issn = {2059-3635}, doi = {10.1038/s41392-020-00417-y}, author = {Loucera, Carlos and Esteban-Medina, Marina and Rian, Kinza and Falco, Matias M and Dopazo, Joaquin and Pe{\~n}a-Chilet, Maria} } @article {422, title = {Differential metabolic activity and discovery of therapeutic targets using summarized metabolic pathway models.}, journal = {NPJ Syst Biol Appl}, volume = {5}, year = {2019}, month = {2019}, pages = {7}, abstract = {

In spite of the increasing availability of genomic and transcriptomic data, there is still a gap between the detection of perturbations in gene expression and the understanding of their contribution to the molecular mechanisms that ultimately account for the phenotype studied. Alterations in the metabolism are behind the initiation and progression of many diseases, including cancer. The wealth of available knowledge on metabolic processes can therefore be used to derive mechanistic models that link gene expression perturbations to changes in metabolic activity that provide relevant clues on molecular mechanisms of disease and drug modes of action (MoA). In particular, pathway modules, which recapitulate the main aspects of metabolism, are especially suitable for this type of modeling. We present Metabolizer, a web-based application that offers an intuitive, easy-to-use interactive interface to analyze differences in pathway metabolic module activities that can also be used for class prediction and in silico prediction of knock-out (KO) effects. Moreover, Metabolizer can automatically predict the optimal KO intervention for restoring a diseased phenotype. We provide different types of validations of some of the predictions made by Metabolizer. Metabolizer is a web tool that allows understanding molecular mechanisms of disease or the MoA of drugs within the context of the metabolism by using gene expression measurements. In addition, this tool automatically suggests potential therapeutic targets for individualized therapeutic interventions.

}, keywords = {Computational Biology, Computer Simulation, Drug discovery, Gene Regulatory Networks, Humans, Internet, Metabolic Networks and Pathways, Models, Biological, Neoplasms, Phenotype, Software, Transcriptome}, issn = {2056-7189}, doi = {10.1038/s41540-019-0087-2}, author = {Cubuk, Cankut and Hidalgo, Marta R and Amadoz, Alicia and Rian, Kinza and Salavert, Francisco and Pujana, Miguel A and Mateo, Francesca and Herranz, Carmen and Carbonell-Caballero, Jos{\'e} and Dopazo, Joaquin} } @article {436, title = {Dysfunctional mitochondrial fission impairs cell reprogramming.}, journal = {Cell Cycle}, volume = {15}, year = {2016}, month = {2016 Dec}, pages = {3240-3250}, abstract = {

We have recently shown that mitochondrial fission is induced early in reprogramming in a Drp1-dependent manner; however, the identity of the factors controlling Drp1 recruitment to mitochondria was unexplored. To investigate this, we used a panel of RNAi targeting factors involved in the regulation of mitochondrial dynamics and we observed that MiD51, Gdap1 and, to a lesser extent, Mff were found to play key roles in this process. Cells derived from Gdap1-null mice were used to further explore the role of this factor in cell reprogramming. Microarray data revealed a prominent down-regulation of cell cycle pathways in Gdap1-null cells early in reprogramming and cell cycle profiling uncovered a G2/M growth arrest in Gdap1-null cells undergoing reprogramming. High-Content analysis showed that this growth arrest was DNA damage-independent. We propose that lack of efficient mitochondrial fission impairs cell reprogramming by interfering with cell cycle progression in a DNA damage-independent manner.

}, keywords = {Animals, Cell Cycle Checkpoints, Cellular Reprogramming, DNA Damage, G2 Phase, Gene Knockdown Techniques, Mice, Mitochondrial Dynamics, Mitosis, Nerve Tissue Proteins, Pluripotent Stem Cells, Transcription Factors}, issn = {1551-4005}, doi = {10.1080/15384101.2016.1241930}, author = {Prieto, Javier and Le{\'o}n, Marian and Ponsoda, Xavier and Garcia-Garcia, Francisco and Bort, Roque and Serna, Eva and Barneo-Mu{\~n}oz, Manuela and Palau, Francesc and Dopazo, Joaquin and L{\'o}pez-Garc{\'\i}a, Carlos and Torres, Josema} } @article {458, title = {Deregulation of key signaling pathways involved in oocyte maturation in FMR1 premutation carriers with Fragile X-associated primary ovarian insufficiency.}, journal = {Gene}, volume = {571}, year = {2015}, month = {2015 Oct 15}, pages = {52-7}, abstract = {

FMR1 premutation female carriers are at risk for Fragile X-associated primary ovarian insufficiency (FXPOI). Insights from knock-in mouse model have recently demonstrated that FXPOI is due to an increased rate of follicle depletion or an impaired development of the growing follicles. Molecular mechanisms responsible for this reduced viability are still unknown. In an attempt to provide new data on the mechanisms that lead to FXPOI, we report the first investigation involving transcription profiling of total blood from FMR1 premutation female carriers with and without FXPOI. A total of 16 unrelated female individuals (6 FMR1 premutated females with FXPOI; 6 FMR1 premutated females without FXPOI; and 4 no-FXPOI females) were studied by whole human genome oligonucleotide microarray (Agilent Technologies). Fold change analysis did not show any genes with significant differential gene expression. However, functional profiling by gene set analysis showed large number of statistically significant deregulated GO annotations as well as numerous KEGG pathways in FXPOI females. These results suggest that the impairment of fertility in these females might be due to a generalized deregulation of key signaling pathways involved in oocyte maturation. In particular, the vasoendotelial growth factor signaling, the inositol phosphate metabolism, the cell cycle, and the MAPK signaling pathways were found to be down-regulated in FXPOI females. Furthermore, a high statistical enrichment of biological processes involved in cell death and survival were found deregulated among FXPOI females. Our results provide new strategic approaches to further investigate the molecular mechanisms and potential therapeutic targets for FXPOI not focused in a single gene but rather in the set of genes involved in these pathways.

}, keywords = {Adult, Aged, Female, Fragile X Mental Retardation Protein, Fragile X Syndrome, Gene Expression Profiling, Gene Expression Regulation, Developmental, Gene ontology, Genome-Wide Association Study, Heterozygote, Humans, Middle Aged, Models, Genetic, mutation, Oligonucleotide Array Sequence Analysis, Oocytes, Primary Ovarian Insufficiency, Signal Transduction}, issn = {1879-0038}, doi = {10.1016/j.gene.2015.06.039}, author = {Alvarez-Mora, M I and Rodriguez-Revenga, L and Madrigal, I and Garc{\'\i}a-Garc{\'\i}a, F and Duran, M and Dopazo, J and Estivill, X and Mil{\`a}, M} } @article {448, title = {Differential Features Between Chronic Skin Inflammatory Diseases Revealed in Skin-Humanized Psoriasis and Atopic Dermatitis Mouse Models.}, journal = {J Invest Dermatol}, year = {2015}, month = {2015 Sep 23}, abstract = {

Psoriasis (PS) and atopic dermatitis (AD) are chronic and relapsing inflammatory diseases of the skin affecting a large number of patients worldwide. Psoriasis is characterized by a Th1/Th17 immunological response whereas acute AD lesions exhibit Th2-dominant inflammation. Current single gene and signaling pathways-based models of inflammatory skin diseases are incomplete. Previous work allowed us to model psoriasis in skin-humanized mice through proper combinations of inflammatory cell components and disruption of barrier function. Herein we describe and characterize an animal model for AD using similar bioengineered-based approaches, by intradermal injection of human Th2 lymphocytes in regenerated human skin after partial removal of stratum corneum. In the present work we have extensively compared this model with the previous and an improved version of the PS model, in which Th17/Th1 lymphocytes replace exogenous cytokines. Comparative expression analyses revealed marked differences in specific epidermal proliferation and differentiation markers and immune-related molecules including antimicrobial peptides. Likewise, the composition of the dermal inflammatory infiltrate presented important differences. Availability of accurate and reliable animal models for these diseases will contribute to the understanding of the pathogenesis and provide valuable tools for drug development and testing.Journal of Investigative Dermatology accepted article preview online, 23 September 2015. doi:10.1038/jid.2015.362.

}, issn = {1523-1747}, doi = {10.1038/jid.2015.362}, author = {Carretero, M and Guerrero-Aspizua, S and Illera, N and Galvez, V and Navarro, M and Garc{\'\i}a-Garc{\'\i}a, F and Dopazo, J and Jorcano, J L and Larcher, F and Del Rio, M} } @article {486, title = {Deciphering intrafamilial phenotypic variability by exome sequencing in a Bardet-Biedl family.}, journal = {Mol Genet Genomic Med}, volume = {2}, year = {2014}, month = {2014 Mar}, pages = {124-33}, abstract = {

Bardet-Biedl syndrome (BBS) is a model ciliopathy characterized by a wide range of clinical variability. The heterogeneity of this condition is reflected in the number of underlying gene defects and the epistatic interactions between the proteins encoded. BBS is generally inherited in an autosomal recessive trait. However, in some families, mutations across different loci interact to modulate the expressivity of the phenotype. In order to investigate the magnitude of epistasis in one BBS family with remarkable intrafamilial phenotypic variability, we designed an exome sequencing-based approach using SOLID 5500xl platform. This strategy allowed the reliable detection of the primary causal mutations in our family consisting of two novel compound heterozygous mutations in McKusick-Kaufman syndrome (MKKS) gene (p.D90G and p.V396F). Additionally, exome sequencing enabled the detection of one novel heterozygous NPHP4 variant which is predicted to activate a cryptic acceptor splice site and is only present in the most severely affected patient. Here, we provide an exome sequencing analysis of a BBS family and show the potential utility of this tool, in combination with network analysis, to detect disease-causing mutations and second-site modifiers. Our data demonstrate how next-generation sequencing (NGS) can facilitate the dissection of epistatic phenomena, and shed light on the genetic basis of phenotypic variability.

}, issn = {2324-9269}, doi = {10.1002/mgg3.50}, author = {Gonz{\'a}lez-del Pozo, Mar{\'\i}a and M{\'e}ndez-Vidal, Cristina and Santoyo-L{\'o}pez, Javier and Vela-Boza, Alicia and Bravo-Gil, Nereida and Rueda, Antonio and Garc{\'\i}a-Alonso, Luz and V{\'a}zquez-Marouschek, Carmen and Dopazo, Joaquin and Borrego, Salud and Anti{\v n}olo, Guillermo} } @article {1035, title = {Deciphering intrafamilial phenotypic variability by exome sequencing in a Bardet{\textendash}Biedl family}, journal = {Molecular Genetics \& Genomic Medicine}, volume = {2}, number = {2}, year = {2014}, pages = {124-133}, abstract = {Bardet{\textendash}Biedl syndrome (BBS) is a model ciliopathy characterized by a wide range of clinical variability. The heterogeneity of this condition is reflected in the number of underlying gene defects and the epistatic interactions between the proteins encoded. BBS is generally inherited in an autosomal recessive trait. However, in some families, mutations across different loci interact to modulate the expressivity of the phenotype. In order to investigate the magnitude of epistasis in one BBS family with remarkable intrafamilial phenotypic variability, we designed an exome sequencing{\textendash}based approach using SOLID 5500xl platform. This strategy allowed the reliable detection of the primary causal mutations in our family consisting of two novel compound heterozygous mutations in McKusick{\textendash}Kaufman syndrome (MKKS) gene (p.D90G and p.V396F). Additionally, exome sequencing enabled the detection of one novel heterozygous NPHP4 variant which is predicted to activate a cryptic acceptor splice site and is only present in the most severely affected patient. Here, we provide an exome sequencing analysis of a BBS family and show the potential utility of this tool, in combination with network analysis, to detect disease-causing mutations and second-site modifiers. Our data demonstrate how next-generation sequencing (NGS) can facilitate the dissection of epistatic phenomena, and shed light on the genetic basis of phenotypic variability.}, doi = {10.1002/mgg3.50}, url = {http://onlinelibrary.wiley.com/doi/10.1002/mgg3.50/full}, author = {Gonz{\'a}lez-del Pozo, Mar{\'\i}a and M{\'e}ndez-Vidal, Cristina and Santoyo-L{\'o}pez, Javier and Vela-Boza, Alicia and Nereida Bravo-Gil and Antonio Rueda and Garc{\'\i}a-Alonso, Luz and V{\'a}zquez-Marouschek, Carmen and Joaqu{\'\i}n Dopazo and Borrego, Salud and Anti{\v n}olo, Guillermo} } @article {503, title = {Defining the genomic signature of totipotency and pluripotency during early human development.}, journal = {PLoS One}, volume = {8}, year = {2013}, month = {2013}, pages = {e62135}, abstract = {

The genetic mechanisms governing human pre-implantation embryo development and the in vitro counterparts, human embryonic stem cells (hESCs), still remain incomplete. Previous global genome studies demonstrated that totipotent blastomeres from day-3 human embryos and pluripotent inner cell masses (ICMs) from blastocysts, display unique and differing transcriptomes. Nevertheless, comparative gene expression analysis has revealed that no significant differences exist between hESCs derived from blastomeres versus those obtained from ICMs, suggesting that pluripotent hESCs involve a new developmental progression. To understand early human stages evolution, we developed an undifferentiation network signature (UNS) and applied it to a differential gene expression profile between single blastomeres from day-3 embryos, ICMs and hESCs. This allowed us to establish a unique signature composed of highly interconnected genes characteristic of totipotency (61 genes), in vivo pluripotency (20 genes), and in vitro pluripotency (107 genes), and which are also proprietary according to functional analysis. This systems biology approach has led to an improved understanding of the molecular and signaling processes governing human pre-implantation embryo development, as well as enabling us to comprehend how hESCs might adapt to in vitro culture conditions.

}, keywords = {Blastocyst Inner Cell Mass, Blastomeres, Cell Differentiation, Embryonic Development, Embryonic Stem Cells, Gene Expression Profiling, Gene Regulatory Networks, Genome, Human, Humans, Molecular Sequence Annotation, Pluripotent Stem Cells, Totipotent Stem Cells}, issn = {1932-6203}, doi = {10.1371/journal.pone.0062135}, author = {Galan, Amparo and Diaz-Gimeno, Patricia and Poo, Maria Eugenia and Valbuena, Diana and Sanchez, Eva and Ruiz, Veronica and Dopazo, Joaquin and Montaner, David and Conesa, Ana and Simon, Carlos} } @article {504, title = {Differential gene-expression analysis defines a molecular pattern related to olive pollen allergy.}, journal = {J Biol Regul Homeost Agents}, volume = {27}, year = {2013}, month = {2013 Apr-Jun}, pages = {337-50}, abstract = {

Analysis of gene-expression profiles by microarrays is useful for characterization of candidate genes, key regulatory networks, and to define phenotypes or molecular signatures which improve the diagnosis and/or classification of the allergic processes. We have used this approach in the study of olive pollen response in order to find differential molecular markers among responders and non-responders to this allergenic source. Five clinical groups, non-allergic, asymptomatic, allergic but not to olive pollen, untreated-olive-pollen allergic patients and olive-pollen allergic patients (under specific-immunotherapy), were assessed during and outside pollen seasons. Whole-genome gene expression analysis was performed in RNAs extracted from PBMCs. After assessment of data quality and principal components analysis (PCA), differential gene-expression, by multiple testing and, functional analyses by KEGG, for pathways and Gene-Ontology for biological processes were performed. Relevance was defined by fold change and corrected P values (less than 0.05). The most differential genes were validated by qRT-PCR in a larger set of individuals. Interestingly, gene-expression profiling obtained by PCA clearly showed five clusters of samples that correlated with the five clinical groups. Furthermore, differential gene expression and functional analyses revealed differential genes and pathways in the five clinical groups. The 93 most significant genes found were validated, and one set of 35 genes was able to discriminate profiles of olive pollen response. Our results, in addition to providing new information on allergic response, define a possible molecular signature for olive pollen allergy which could be useful for the diagnosis and treatment of this and other sensitizations.

}, keywords = {Adult, Female, Gene Expression Profiling, Humans, Male, Middle Aged, Olea, Principal Component Analysis, Rhinitis, Allergic, Seasonal}, issn = {0393-974X}, author = {Aguerri, M and Calzada, D and Montaner, D and Mata, M and Florido, F and Quiralte, J and Dopazo, J and Lahoz, C and Cardaba, B} } @inbook {954, title = {Docencia en Estad{\'\i}stica: Experiencias de Innovaci{\'o}n}, booktitle = {III Jornadas de Intercambio de Experiencias de Innovaci{\'o}n Educativa en Estad{\'\i}stica}, volume = {1}, year = {2013}, pages = {201-210}, issn = {978-84-9858-872-9}, author = {Garcia-Garcia, Francisco and Montaner, David} } @article {931, title = {Development, Characterization and Experimental Validation of a Cultivated Sunflower (Helianthus annuus L.) Gene Expression Oligonucleotide Microarray.}, journal = {PloS one}, volume = {7}, year = {2012}, month = {2012}, pages = {e45899}, abstract = {Oligonucleotide-based microarrays with accurate gene coverage represent a key strategy for transcriptional studies in orphan species such as sunflower, H. annuus L., which lacks full genome sequences. The goal of this study was the development and functional annotation of a comprehensive sunflower unigene collection and the design and validation of a custom sunflower oligonucleotide-based microarray. A large scale EST (>130,000 ESTs) curation, assembly and sequence annotation was performed using Blast2GO (www.blast2go.de). The EST assembly comprises 41,013 putative transcripts (12,924 contigs and 28,089 singletons). The resulting Sunflower Unigen Resource (SUR version 1.0) was used to design an oligonucleotide-based Agilent microarray for cultivated sunflower. This microarray includes a total of 42,326 features: 1,417 Agilent controls, 74 control probes for sunflower replicated 10 times (740 controls) and 40,169 different non-control probes. Microarray performance was validated using a model experiment examining the induction of senescence by water deficit. Pre-processing and differential expression analysis of Agilent microarrays was performed using the Bioconductor limma package. The analyses based on p-values calculated by eBayes (p<0.01) allowed the detection of 558 differentially expressed genes between water stress and control conditions; from these, ten genes were further validated by qPCR. Over-represented ontologies were identified using FatiScan in the Babelomics suite. This work generated a curated and trustable sunflower unigene collection, and a custom, validated sunflower oligonucleotide-based microarray using Agilent technology. Both the curated unigene collection and the validated oligonucleotide microarray provide key resources for sunflower genome analysis, transcriptional studies, and molecular breeding for crop improvement.}, issn = {1932-6203}, doi = {10.1371/journal.pone.0045899}, url = {http://www.plosone.org/article/info\%3Adoi\%2F10.1371\%2Fjournal.pone.0045899}, author = {Fernandez, Paula and Soria, Marcelo and Blesa, David and Dirienzo, Julio and Moschen, Sebasti{\'a}n and Rivarola, M{\'a}ximo and Clavijo, Bernardo Jose and Gonzalez, Sergio and Peluffo, Lucila and Pr{\'\i}ncipi, Dario and Dosio, Guillermo and Aguirrezabal, Luis and Garcia-Garcia, Francisco and Ana Conesa and Hopp, Esteban and Joaqu{\'\i}n Dopazo and Heinz, Ruth Amelia and Paniego, Norma} } @article {512, title = {Discovering the hidden sub-network component in a ranked list of genes or proteins derived from genomic experiments.}, journal = {Nucleic Acids Res}, volume = {40}, year = {2012}, month = {2012 Nov 01}, pages = {e158}, abstract = {

Genomic experiments (e.g. differential gene expression, single-nucleotide polymorphism association) typically produce ranked list of genes. We present a simple but powerful approach which uses protein-protein interaction data to detect sub-networks within such ranked lists of genes or proteins. We performed an exhaustive study of network parameters that allowed us concluding that the average number of components and the average number of nodes per component are the parameters that best discriminate between real and random networks. A novel aspect that increases the efficiency of this strategy in finding sub-networks is that, in addition to direct connections, also connections mediated by intermediate nodes are considered to build up the sub-networks. The possibility of using of such intermediate nodes makes this approach more robust to noise. It also overcomes some limitations intrinsic to experimental designs based on differential expression, in which some nodes are invariant across conditions. The proposed approach can also be used for candidate disease-gene prioritization. Here, we demonstrate the usefulness of the approach by means of several case examples that include a differential expression analysis in Fanconi Anemia, a genome-wide association study of bipolar disorder and a genome-scale study of essentiality in cancer genes. An efficient and easy-to-use web interface (available at http://www.babelomics.org) based on HTML5 technologies is also provided to run the algorithm and represent the network.

}, keywords = {Bipolar Disorder, Fanconi Anemia, Gene Regulatory Networks, Genes, Neoplasm, Genome-Wide Association Study, Genomics, Humans, Protein Interaction Mapping}, issn = {1362-4962}, doi = {10.1093/nar/gks699}, author = {Garc{\'\i}a-Alonso, Luz and Alonso, Roberto and Vidal, Enrique and Amadoz, Alicia and De Maria, Alejandro and Minguez, Pablo and Medina, Ignacio and Dopazo, Joaquin} } @article {513, title = {Diversification of the expanded teleost-specific toll-like receptor family in Atlantic cod, Gadus morhua.}, journal = {BMC Evol Biol}, volume = {12}, year = {2012}, month = {2012 Dec 29}, pages = {256}, abstract = {

BACKGROUND: Toll-like receptors (Tlrs) are major molecular pattern recognition receptors of the innate immune system. Atlantic cod (Gadus morhua) is the first vertebrate known to have lost most of the mammalian Tlr orthologues, particularly all bacterial recognising and other cell surface Tlrs. On the other hand, its genome encodes a unique repertoire of teleost-specific Tlrs. The aim of this study was to investigate if these duplicate Tlrs have been retained through adaptive evolution to compensate for the lack of other cell surface Tlrs in the cod genome.

RESULTS: In this study, one tlr21, 12 tlr22 and two tlr23 genes representing the teleost-specific Tlr family have been cloned and characterised in cod. Phylogenetic analysis grouped all tlr22 genes under a single clade, indicating that the multiple cod paralogues have arisen through lineage-specific duplications. All tlrs examined were transcribed in immune-related tissues as well as in stomach, gut and gonads of adult cod and were differentially expressed during early development. These tlrs were also differentially regulated following immune challenge by immersion with Vibrio anguillarum, indicating their role in the immune response. An increase in water temperature from 4 to 12{\textdegree}C was associated with a 5.5-fold down-regulation of tlr22d transcript levels in spleen. Maximum likelihood analysis with different evolution models revealed that tlr22 genes are under positive selection. A total of 24 codons were found to be positively selected, of which 19 are in the ligand binding region of ectodomain.

CONCLUSION: Positive selection pressure coupled with experimental evidence of differential expression strongly support the hypothesis that teleost-specific tlr paralogues in cod are undergoing neofunctionalisation and can recognise bacterial pathogen-associated molecular patterns to compensate for the lack of other cell surface Tlrs.

}, keywords = {Amino Acid Sequence, Animals, Binding Sites, Evolution, Molecular, Fish Diseases, Fish Proteins, Gadus morhua, Gene Expression Profiling, Genetic Variation, Gills, Head Kidney, Host-Pathogen Interactions, Models, Molecular, Molecular Sequence Data, Multigene Family, Phylogeny, Protein Structure, Tertiary, Reverse Transcriptase Polymerase Chain Reaction, Selection, Genetic, Sequence Analysis, DNA, Sequence Homology, Amino Acid, Temperature, Toll-Like Receptors, Vibrio}, issn = {1471-2148}, doi = {10.1186/1471-2148-12-256}, author = {Sundaram, Arvind Y M and Kiron, Viswanath and Dopazo, Joaquin and Fernandes, Jorge M O} } @article {529, title = {Differential expression in RNA-seq: a matter of depth.}, journal = {Genome Res}, volume = {21}, year = {2011}, month = {2011 Dec}, pages = {2213-23}, abstract = {

Next-generation sequencing (NGS) technologies are revolutionizing genome research, and in particular, their application to transcriptomics (RNA-seq) is increasingly being used for gene expression profiling as a replacement for microarrays. However, the properties of RNA-seq data have not been yet fully established, and additional research is needed for understanding how these data respond to differential expression analysis. In this work, we set out to gain insights into the characteristics of RNA-seq data analysis by studying an important parameter of this technology: the sequencing depth. We have analyzed how sequencing depth affects the detection of transcripts and their identification as differentially expressed, looking at aspects such as transcript biotype, length, expression level, and fold-change. We have evaluated different algorithms available for the analysis of RNA-seq and proposed a novel approach--NOISeq--that differs from existing methods in that it is data-adaptive and nonparametric. Our results reveal that most existing methodologies suffer from a strong dependency on sequencing depth for their differential expression calls and that this results in a considerable number of false positives that increases as the number of reads grows. In contrast, our proposed method models the noise distribution from the actual data, can therefore better adapt to the size of the data set, and is more effective in controlling the rate of false discoveries. This work discusses the true potential of RNA-seq for studying regulation at low expression ranges, the noise within RNA-seq data, and the issue of replication.

}, keywords = {Algorithms, Expressed Sequence Tags, Gene Expression Profiling, Gene Expression Regulation, Humans, Models, Genetic, Oligonucleotide Array Sequence Analysis}, issn = {1549-5469}, doi = {10.1101/gr.124321.111}, author = {Tarazona, Sonia and Garc{\'\i}a-Alcalde, Fernando and Dopazo, Joaquin and Ferrer, Alberto and Conesa, Ana} } @article {21266330, title = {Differential Lipid Partitioning Between Adipocytes and Tissue Macrophages Modulates Macrophage Lipotoxicity and M2/M1 Polarization in Obese Mice.}, journal = {Diabetes}, volume = {60}, number = {3}, year = {2011}, month = {2011 Jan 24}, pages = {797-809}, abstract = {

OBJECTIVE Obesity-associated insulin resistance is characterized by a state of chronic, low-grade inflammation that is associated with the accumulation of M1 proinflammatory macrophages in adipose tissue. Although different evidence explains the mechanisms linking the expansion of adipose tissue and adipose tissue macrophage (ATM) polarization, in the current study we investigated the concept of lipid-induced toxicity as the pathogenic link that could explain the trigger of this response. RESEARCH DESIGN AND METHODS We addressed this question using isolated ATMs and adipocytes from genetic and diet-induced murine models of obesity. Through transcriptomic and lipidomic analysis, we created a model integrating transcript and lipid species networks simultaneously occurring in adipocytes and ATMs and their reversibility by thiazolidinedione treatment. RESULTS We show that polarization of ATMs is associated with lipid accumulation and the consequent formation of foam cell-like cells in adipose tissue. Our study reveals that early stages of adipose tissue expansion are characterized by M2-polarized ATMs and that progressive lipid accumulation within ATMs heralds the M1 polarization, a macrophage phenotype associated with severe obesity and insulin resistance. Furthermore, rosiglitazone treatment, which promotes redistribution of lipids toward adipocytes and extends the M2 ATM polarization state, prevents the lipid alterations associated with M1 ATM polarization. CONCLUSIONS Our data indicate that the M1 ATM polarization in obesity might be a macrophage-specific manifestation of a more general lipotoxic pathogenic mechanism. This indicates that strategies to optimize fat deposition and repartitioning toward adipocytes might improve insulin sensitivity by preventing ATM lipotoxicity and M1 polarization.

}, author = {Prieur, Xavier and Mok, Crystal Y L and Velagapudi, Vidya R and N{\'u}{\~n}ez, Vanessa and Fuentes, Luc{\'\i}a and Montaner, David and Ishikawa, Ko and Camacho, Alberto and Barbarroja, Nuria and O{\textquoteright}Rahilly, Stephen and Sethi, Jaswinder and Dopazo, Joaquin and Oresic, Matej and Ricote, Mercedes and Vidal-Puig, Antonio} } @article {22039362, title = {Discovery of an ebolavirus-like filovirus in europe.}, journal = {PLoS pathogens}, volume = {7}, year = {2011}, month = {2011 Oct}, pages = {e1002304}, abstract = {

Filoviruses, amongst the most lethal of primate pathogens, have only been reported as natural infections in sub-Saharan Africa and the Philippines. Infections of bats with the ebolaviruses and marburgviruses do not appear to be associated with disease. Here we report identification in dead insectivorous bats of a genetically distinct filovirus, provisionally named Lloviu virus, after the site of detection, Cueva del Lloviu, in Spain.

}, author = {Negredo, Ana and Palacios, Gustavo and V{\'a}zquez-Mor{\'o}n, Sonia and Gonz{\'a}lez, F{\'e}lix and Dopazo, Hern{\'a}n and Molero, Francisca and Juste, Javier and Quetglas, Juan and Savji, Nazir and de la Cruz Mart{\'\i}nez, Maria and Herrera, Jesus Enrique and Pizarro, Manuel and Hutchison, Stephen K and Echevarr{\'\i}a, Juan E and Lipkin, W Ian and Tenorio, Antonio} } @article {22112448, title = {Does singlet oxygen activate cell death in Arabidopsis cell suspension cultures? Analysis of the early transcriptional defence responses to high light stress.}, journal = {Plant signaling \& behavior}, volume = {6}, year = {2011}, month = {2011 Dec 1}, abstract = {

Can Arabidopsis cell suspension cultures (ACSC) provide a useful working model to investigate genetically-controlled defence responses with signalling cascades starting in chloroplasts? In order to provide a convincing answer, we analysed the early transcriptional profile of Arabidopsis cells at high light (HL). The results showed that ACSC respond to HL in a manner that resembles the singlet oxygen ( ( 1) O 2)-mediated defence responses described for the conditional fluorescent (flu) mutant of Arabidopsis thaliana. The flu mutant is characterized by the accumulation of free protochlorophyllide (Pchlide) in plastids when put into darkness and the subsequent production of ( 1) O 2 when the light is on. In ACSC, ( 1) O 2 is produced in chloroplasts at HL when excess excitation energy flows into photosystem II (PSII). Other reactive oxygen species are also produced in ACSC at HL, but to a lesser extent. When the HL stress ceases, ACSC recovers the initial rate of oxygen evolution and cell growth continues. We can conclude that chloroplasts of ACSC are both photosynthetically active and capable of initiating ( 1) O 2-mediated signalling cascades that activate a broad range of genetically-controlled defence responses. The up-regulation of transcripts associated with the biosynthesis and signalling pathways of OPDA (12-oxophytodienoic acid) and ethylene (ET) suggests that the activated defence responses at HL are governed by these two hormones. In contrast to the flu mutant, the ( 1) O 2-mediated defence responses were independent of the up-regulation of EDS1 (enhanced disease susceptibility) required for the accumulation of salicylic acid (SA) and genetically-controlled cell death.\ 

}, author = {Guti{\'e}rrez, Jorge and Gonz{\'a}lez-P{\'e}rez, Sergio and Garcia-Garcia, Francisco and Lorenzo, Oscar and Arellano, Juan B} } @article {547, title = {DNA methylation epigenotypes in breast cancer molecular subtypes.}, journal = {Breast Cancer Res}, volume = {12}, year = {2010}, month = {2010}, pages = {R77}, abstract = {

INTRODUCTION: Identification of gene expression based breast cancer subtypes is considered as a critical means of prognostication. Genetic mutations along with epigenetic alterations contribute to gene expression changes occurring in breast cancer. So far, these epigenetic contributions to sporadic breast cancer subtypes have not been well characterized, and there is only a limited understanding of the epigenetic mechanisms affected in those particular breast cancer subtypes. The present study was undertaken to dissect the breast cancer methylome and deliver specific epigenotypes associated with particular breast cancer subtypes.

METHODS: Using a microarray approach we analyzed DNA methylation in regulatory regions of 806 cancer related genes in 28 breast cancer paired samples. We subsequently performed substantial technical and biological validation by Pyrosequencing, investigating the top qualifying 19 CpG regions in independent cohorts encompassing 47 basal-like, 44 ERBB2+ overexpressing, 48 luminal A and 48 luminal B paired breast cancer/adjacent tissues. Using all-subset selection method, we identified the most subtype predictive methylation profiles in multivariable logistic regression analysis.

RESULTS: The approach efficiently recognized 15 individual CpG loci differentially methylated in breast cancer tumor subtypes. We further identify novel subtype specific epigenotypes which clearly demonstrate the differences in the methylation profiles of basal-like and human epidermal growth factor 2 (HER2)-overexpressing tumors.

CONCLUSIONS: Our results provide evidence that well defined DNA methylation profiles enables breast cancer subtype prediction and support the utilization of this biomarker for prognostication and therapeutic stratification of patients with breast cancer.

}, keywords = {Aged, Breast Neoplasms, CpG Islands, DNA Methylation, Epigenesis, Genetic, Female, Gene Expression Profiling, Genes, p53, Genotype, Humans, Ki-67 Antigen, Middle Aged, mutation, Neoplasm Grading, Oligonucleotide Array Sequence Analysis, Receptor, ErbB-2, Tumor Suppressor Protein p53}, issn = {1465-542X}, doi = {10.1186/bcr2721}, author = {Bediaga, Naiara G and Acha-Sagredo, Amelia and Guerra, Isabel and Viguri, Amparo and Albaina, Carmen and Ruiz Diaz, Irune and Rezola, Ricardo and Alberdi, Maria Jesus and Dopazo, Joaquin and Montaner, David and Renobales, Mertxe and Fernandez, Agustin F and Field, John K and Fraga, Mario F and Liloglou, Triantafillos and de Pancorbo, Marian M} } @article {18652888, title = {Direct functional assessment of the composite phenotype through multivariate projection strategies}, journal = {Genomics}, volume = {92}, number = {6}, year = {2008}, note = {

Conesa, Ana Bro, Rasmus Garcia-Garcia, Francisco Prats, Jose Manuel Gotz, Stefan Kjeldahl, Karin Montaner, David Dopazo, Joaquin Evaluation Studies Research Support, Non-U.S. Gov{\textquoteright}t United States Genomics Genomics. 2008 Dec;92(6):373-83. Epub 2008 Sep 13.

}, pages = {373-83}, abstract = {

We present a novel approach for the analysis of transcriptomics data that integrates functional annotation of gene sets with expression values in a multivariate fashion, and directly assesses the relation of functional features to a multivariate space of response phenotypical variables. Multivariate projection methods are used to obtain new correlated variables for a set of genes that share a given function. These new functional variables are then related to the response variables of interest. The analysis of the principal directions of the multivariate regression allows for the identification of gene function features correlated with the phenotype. Two different transcriptomics studies are used to illustrate the statistical and interpretative aspects of the methodology. We demonstrate the superiority of the proposed method over equivalent approaches.

}, keywords = {Breast Neoplasms/genetics Computational Biology/*methods Databases, Genetic Female Gene Expression Profiling/*statistics \& numerical data Humans Mathematical Computing Multivariate Analysis Phenotype}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=18652888}, author = {A. Conesa and Bro, R. and Garcia-Garcia, F. and Prats, J. M. and Gotz, S. and Kjeldahl, K. and Montaner, D. and Dopazo, J.} } @article {590, title = {Direct functional assessment of the composite phenotype through multivariate projection strategies.}, journal = {Genomics}, volume = {92}, year = {2008}, month = {2008 Dec}, pages = {373-83}, abstract = {

We present a novel approach for the analysis of transcriptomics data that integrates functional annotation of gene sets with expression values in a multivariate fashion, and directly assesses the relation of functional features to a multivariate space of response phenotypical variables. Multivariate projection methods are used to obtain new correlated variables for a set of genes that share a given function. These new functional variables are then related to the response variables of interest. The analysis of the principal directions of the multivariate regression allows for the identification of gene function features correlated with the phenotype. Two different transcriptomics studies are used to illustrate the statistical and interpretative aspects of the methodology. We demonstrate the superiority of the proposed method over equivalent approaches.

}, keywords = {Breast Neoplasms, Computational Biology, Databases, Genetic, Female, Gene Expression Profiling, Humans, Mathematical Computing, Multivariate Analysis, Phenotype}, issn = {1089-8646}, doi = {10.1016/j.ygeno.2008.05.015}, author = {Conesa, Ana and Bro, Rasmus and Garcia-Garcia, Francisco and Prats, Jos{\'e} Manuel and G{\"o}tz, Stefan and Kjeldahl, Karin and Montaner, David and Dopazo, Joaquin} } @article {17478513, title = {DBAli tools: mining the protein structure space}, journal = {Nucleic Acids Res}, volume = {35}, number = {Web Server issue}, year = {2007}, note = {Marti-Renom, Marc A Pieper, Ursula Madhusudhan, M S Rossi, Andrea Eswar, Narayanan Davis, Fred P Al-Shahrour, Fatima Dopazo, Joaquin Sali, Andrej GM 62529/GM/NIGMS NIH HHS/United States GM074929/GM/NIGMS NIH HHS/United States GM54762/GM/NIGMS NIH HHS/United States GM71790/GM/NIGMS NIH HHS/United States Research Support, N.I.H., Extramural Research Support, Non-U.S. Gov{\textquoteright}t England Nucleic acids research Nucleic Acids Res. 2007 Jul;35(Web Server issue):W393-7. Epub 2007 May 3.}, pages = {W393-7}, abstract = {The DBAli tools use a comprehensive set of structural alignments in the DBAli database to leverage the structural information deposited in the Protein Data Bank (PDB). These tools include (i) the DBAlit program that allows users to input the 3D coordinates of a protein structure for comparison by MAMMOTH against all chains in the PDB; (ii) the AnnoLite and AnnoLyze programs that annotate a target structure based on its stored relationships to other structures; (iii) the ModClus program that clusters structures by sequence and structure similarities; (iv) the ModDom program that identifies domains as recurrent structural fragments and (v) an implementation of the COMPARER method in the SALIGN command in MODELLER that creates a multiple structure alignment for a set of related protein structures. Thus, the DBAli tools, which are freely accessible via the World Wide Web at http://salilab.org/DBAli/, allow users to mine the protein structure space by establishing relationships between protein structures and their functions.}, keywords = {*Algorithms Amino Acid Sequence Computational Biology/*methods Data Interpretation, Amino Acid *Software Structure-Activity Relationship, Protein Internet Molecular Sequence Data Protein Conformation Proteins/*chemistry/classification/*metabolism Pseudomonas aeruginosa/*metabolism Sequence Alignment/*methods Sequence Analysis, Protein/*methods Sequence Homology, Statistical *Databases}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=17478513}, author = {M. A. Marti-Renom and Pieper, U. and Madhusudhan, M. S. and Rossi, A. and Eswar, N. and Davis, F. P. and Fatima Al-Shahrour and Dopazo, J. and Sali, A.} } @article {603, title = {DBAli tools: mining the protein structure space.}, journal = {Nucleic Acids Res}, volume = {35}, year = {2007}, month = {2007 Jul}, pages = {W393-7}, abstract = {

The DBAli tools use a comprehensive set of structural alignments in the DBAli database to leverage the structural information deposited in the Protein Data Bank (PDB). These tools include (i) the DBAlit program that allows users to input the 3D coordinates of a protein structure for comparison by MAMMOTH against all chains in the PDB; (ii) the AnnoLite and AnnoLyze programs that annotate a target structure based on its stored relationships to other structures; (iii) the ModClus program that clusters structures by sequence and structure similarities; (iv) the ModDom program that identifies domains as recurrent structural fragments and (v) an implementation of the COMPARER method in the SALIGN command in MODELLER that creates a multiple structure alignment for a set of related protein structures. Thus, the DBAli tools, which are freely accessible via the World Wide Web at http://salilab.org/DBAli/, allow users to mine the protein structure space by establishing relationships between protein structures and their functions.

}, keywords = {Algorithms, Amino Acid Sequence, Computational Biology, Data Interpretation, Statistical, Databases, Protein, Internet, Molecular Sequence Data, Protein Conformation, Proteins, Pseudomonas aeruginosa, Sequence Alignment, Sequence Analysis, Protein, Sequence Homology, Amino Acid, Software, Structure-Activity Relationship}, issn = {1362-4962}, doi = {10.1093/nar/gkm236}, author = {Marti-Renom, Marc A and Pieper, Ursula and Madhusudhan, M S and Rossi, Andrea and Eswar, Narayanan and Davis, Fred P and Al-Shahrour, F{\'a}tima and Dopazo, Joaquin and Sali, Andrej} } @article {17519250, title = {Discovering gene expression patterns in time course microarray experiments by ANOVA-SCA}, journal = {Bioinformatics}, volume = {23}, number = {14}, year = {2007}, note = {Nueda, Maria Jose Conesa, Ana Westerhuis, Johan A Hoefsloot, Huub C J Smilde, Age K Talon, Manuel Ferrer, Alberto Research Support, Non-U.S. Gov{\textquoteright}t England Bioinformatics (Oxford, England) Bioinformatics. 2007 Jul 15;23(14):1792-800. Epub 2007 May 22.}, pages = {1792-800}, abstract = {MOTIVATION: Designed microarray experiments are used to investigate the effects that controlled experimental factors have on gene expression and learn about the transcriptional responses associated with external variables. In these datasets, signals of interest coexist with varying sources of unwanted noise in a framework of (co)relation among the measured variables and with the different levels of the studied factors. Discovering experimentally relevant transcriptional changes require methodologies that take all these elements into account. RESULTS: In this work, we develop the application of the Analysis of variance-simultaneous component analysis (ANOVA-SCA) Smilde et al. Bioinformatics, (2005) to the analysis of multiple series time course microarray data as an example of multifactorial gene expression profiling experiments. We denoted this implementation as ASCA-genes. We show how the combination of ANOVA-modeling and a dimension reduction technique is effective in extracting targeted signals from data by-passing structural noise. The methodology is valuable for identifying main and secondary responses associated with the experimental factors and spotting relevant experimental conditions. We additionally propose a novel approach for gene selection in the context of the relation of individual transcriptional patterns to global gene expression signals. We demonstrate the methodology on both real and synthetic datasets. AVAILABILITY: ASCA-genes has been implemented in the statistical language R and is available at http://www.ivia.es/centrodegenomica/bioinformatics.htm. SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, keywords = {Algorithms *Analysis of Variance Computational Biology/*methods Computer Simulation Data Interpretation, Genetic, Genetic Models, Statistical Gene Expression Profiling/*methods Models, Statistical Oligonucleotide Array Sequence Analysis/*methods Principal Component Analysis Time Factors Transcription}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=17519250}, author = {Nueda, M. J. and A. Conesa and Westerhuis, J. A. and Hoefsloot, H. C. and Smilde, A. K. and Talon, M. and Ferrer, A.} } @article {17120584, title = {Development of the GENIPOL European flounder (Platichthys flesus) microarray and determination of temporal transcriptional responses to cadmium at low dose}, journal = {Environ Sci Technol}, volume = {40}, number = {20}, year = {2006}, note = {Williams, Tim D Diab, Amer M George, Stephen G Godfrey, Rita E Sabine, Victoria Conesa, Ana Minchin, Steven D Watts, Phil C Chipman, James K Research Support, Non-U.S. Gov{\textquoteright}t United States Environmental science \& technology Environ Sci Technol. 2006 Oct 15;40(20):6479-88.}, pages = {6479-88}, abstract = {We have constructed a high density, 13 270-clone cDNA array for the sentinel fish species European flounder (Platichthys flesus), combining clones from suppressive subtractive hybridization and a liver cDNA library; DNA sequences of 5211 clones were determined. Fish were treated by single intraperitoneal injection with 50 micrograms cadmium chloride per kilogram body weight, a dose relevant to environmental exposures, and hepatic gene expression changes were determined at 1, 2, 4, 8, and 16 days postinjection in comparison to saline-treated controls. Gene expression responses were confirmed by real-time reverse transcription polymerase chain reaction (RT-PCR). Blast2GO gene ontology analysis highlighted a general induction of the unfolded protein response, response to oxidative stress, protein synthesis, transport, and degradation pathways, while apoptosis, cell cycle, cytoskeleton, and cytokine genes were also affected. Transcript levels of cytochrome P450 1A (CYP1A) were repressed and vitellogenin altered, real-time PCR showed induction of metallothionein. We thus describe the establishment of a useful resource for ecotoxicogenomics and the determination of the temporal molecular responses to cadmium, a prototypical heavy metal pollutant.}, keywords = {Animals Cadmium Chloride/administration \& dosage/*pharmacology Dose-Response Relationship, Developmental/drug effects Liver/drug effects/growth \& development/metabolism Oligonucleotide Array Sequence Analysis/*methods Reverse Transcriptase Polymerase Chain Reaction Transcription, Drug Environmental Monitoring/methods Flounder/*genetics/growth \& development Gene Expression Profiling Gene Expression Regulation, Genetic/*drug effects}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=17120584}, author = {Williams, T. D. and Diab, A. M. and George, S. G. and Godfrey, R. E. and Sabine, V. and A. Conesa and Minchin, S. D. and Watts, P. C. and Chipman, J. K.} } @article {16522224, title = {Discovery and hypothesis generation through bioinformatics}, journal = {Genome Biol}, volume = {7}, number = {2}, year = {2006}, note = {Dopazo, Joaquin Aloy, Patrick Congresses England Genome biology Genome Biol. 2006;7(2):307. Epub 2006 Feb 27.}, pages = {307}, abstract = {A report on the 4th European Conference on Computational Biology and the 6th Spanish Annual Meeting on Bioinformatics, Madrid, Spain, 28 September-1 October 2005.}, keywords = {*Computational Biology Genome, Genetic Phylogeny, Human *Genomics Humans *Models}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=16522224}, author = {Dopazo, J. and Aloy, P.} } @inbook {482, title = {Data analysis and visualisation in genomics and proteomics}, year = {2005}, publisher = {Wiley, F. Azuaje and J. Dopazo}, organization = {Wiley, F. Azuaje and J. Dopazo}, keywords = {babelomics}, author = {F. Azuaje and Dopazo, J.} } @inbook {489, title = {Data and Predictive Model Integration: an Overview of Key Concepts, Problems and Solutions}, booktitle = {Data analysis and visualisation in genomics and proteomics}, year = {2005}, publisher = {Wiley, F. Azuaje and J. Dopazo}, organization = {Wiley, F. Azuaje and J. Dopazo}, author = {F. Azuaje and Dopazo, J. and Wang, H} } @article {15883372, title = {Detecting remotely related proteins by their interactions and sequence similarity}, journal = {Proc Natl Acad Sci U S A}, volume = {102}, number = {20}, year = {2005}, note = {Espadaler, Jordi Aragues, Ramon Eswar, Narayanan Marti-Renom, Marc A Querol, Enrique Aviles, Francesc X Sali, Andrej Oliva, Baldomero R01 GM54762/GM/NIGMS NIH HHS/United States Comparative Study Research Support, N.I.H., Extramural Research Support, Non-U.S. Gov{\textquoteright}t Research Support, U.S. Gov{\textquoteright}t, P.H.S. United States Proceedings of the National Academy of Sciences of the United States of America Proc Natl Acad Sci U S A. 2005 May 17;102(20):7151-6. Epub 2005 May 9.}, pages = {7151-6}, abstract = {The function of an uncharacterized protein is usually inferred either from its homology to, or its interactions with, characterized proteins. Here, we use both sequence similarity and protein interactions to identify relationships between remotely related protein sequences. We rely on the fact that homologous sequences share similar interactions, and, therefore, the set of interacting partners of the partners of a given protein is enriched by its homologs. The approach was bench-marked by assigning the fold and functional family to test sequences of known structure. Specifically, we relied on 1,434 proteins with known folds, as defined in the Structural Classification of Proteins (SCOP) database, and with known interacting partners, as defined in the Database of Interacting Proteins (DIP). For this subset, the specificity of fold assignment was increased from 54\% for position-specific iterative BLAST to 75\% for our approach, with a concomitant increase in sensitivity for a few percentage points. Similarly, the specificity of family assignment at the e-value threshold of 10(-8) was increased from 70\% to 87\%. The proposed method would be a useful tool for large-scale automated discovery of remote relationships between protein sequences, given its unique reliance on sequence similarity and protein-protein interactions.}, keywords = {Amino Acid, Computational Biology Databases, Molecular Protein Conformation Protein Folding Proteins/*genetics/*metabolism Proteomics/*methods *Sequence Homology, Protein *Evolution}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=15883372}, author = {Espadaler, J. and Aragues, R. and Eswar, N. and M. A. Marti-Renom and Querol, E. and Aviles, F. X. and Sali, A. and Oliva, B.} } @article {15830128, title = {Development of a citrus genome-wide EST collection and cDNA microarray as resources for genomic studies}, journal = {Plant Mol Biol}, volume = {57}, number = {3}, year = {2005}, note = {Forment, J Gadea, J Huerta, L Abizanda, L Agusti, J Alamar, S Alos, E Andres, F Arribas, R Beltran, J P Berbel, A Blazquez, M A Brumos, J Canas, L A Cercos, M Colmenero-Flores, J M Conesa, A Estables, B Gandia, M Garcia-Martinez, J L Gimeno, J Gisbert, A Gomez, G Gonzalez-Candelas, L Granell, A Guerri, J Lafuente, M T Madueno, F Marcos, J F Marques, M C Martinez, F Martinez-Godoy, M A Miralles, S Moreno, P Navarro, L Pallas, V Perez-Amador, M A Perez-Valle, J Pons, C Rodrigo, I Rodriguez, P L Royo, C Serrano, R Soler, G Tadeo, F Talon, M Terol, J Trenor, M Vaello, L Vicente, O Vidal, Ch Zacarias, L Conejero, V Comparative Study Research Support, U.S. Gov{\textquoteright}t, Non-P.H.S. Netherlands Plant molecular biology Plant Mol Biol. 2005 Feb;57(3):375-91.}, pages = {375-91}, abstract = {A functional genomics project has been initiated to approach the molecular characterization of the main biological and agronomical traits of citrus. As a key part of this project, a citrus EST collection has been generated from 25 cDNA libraries covering different tissues, developmental stages and stress conditions. The collection includes a total of 22,635 high-quality ESTs, grouped in 11,836 putative unigenes, which represent at least one third of the estimated number of genes in the citrus genome. Functional annotation of unigenes which have Arabidopsis orthologues (68\% of all unigenes) revealed gene representation in every major functional category, suggesting that a genome-wide EST collection was obtained. A Citrus clementina Hort. ex Tan. cv. Clemenules genomic library, that will contribute to further characterization of relevant genes, has also been constructed. To initiate the analysis of citrus transcriptome, we have developed a cDNA microarray containing 12,672 probes corresponding to 6875 putative unigenes of the collection. Technical characterization of the microarray showed high intra- and inter-array reproducibility, as well as a good range of sensitivity. We have also validated gene expression data achieved with this microarray through an independent technique such as RNA gel blot analysis.}, keywords = {Citrus/*genetics DNA, Complementary/chemistry/genetics *Expressed Sequence Tags Gene Expression Profiling Gene Library *Genome, DNA, Plant Genomics/*methods Molecular Sequence Data Oligonucleotide Array Sequence Analysis/*methods RNA, Plant/genetics/metabolism Reproducibility of Results Sequence Analysis}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=15830128}, author = {J. Forment and J. Gadea and Huerta, L. and Abizanda, L. and Agusti, J. and Alamar, S. and Alos, E. and Andres, F. and Arribas, R. and Beltran, J. P. and Berbel, A. and Blazquez, M. A. and Brumos, J. and Canas, L. A. and Cercos, M. and Colmenero-Flores, J. M. and A. Conesa and Estables, B. and Gandia, M. and Garcia-Martinez, J. L. and Gimeno, J. and Gisbert, A. and Gomez, G. and Gonzalez-Candelas, L. and Granell, A. and Guerri, J. and Lafuente, M. T. and Madueno, F. and Marcos, J. F. and Marques, M. C. and Martinez, F. and Martinez-Godoy, M. A. and Miralles, S. and Moreno, P. and Navarro, L. and Pallas, V. and Perez-Amador, M. A. and Perez-Valle, J. and Pons, C. and Rodrigo, I. and Rodriguez, P. L. and Royo, C. and Serrano, R. and Soler, G. and Tadeo, F. and Talon, M. and Terol, J. and Trenor, M. and Vaello, L. and Vicente, O. and Vidal, Ch and Zacarias, L. and Conejero, V.} } @article {15840702, title = {Discovering molecular functions significantly related to phenotypes by combining gene expression data and biological information}, journal = {Bioinformatics}, volume = {21}, number = {13}, year = {2005}, note = {

Al-Shahrour, Fatima Diaz-Uriarte, Ramon Dopazo, Joaquin Evaluation Studies Research Support, Non-U.S. Gov{\textquoteright}t England Bioinformatics (Oxford, England) Bioinformatics. 2005 Jul 1;21(13):2988-93. Epub 2005 Apr 19.

}, pages = {2988-93}, abstract = {

MOTIVATION: The analysis of genome-scale data from different high throughput techniques can be used to obtain lists of genes ordered according to their different behaviours under distinct experimental conditions corresponding to different phenotypes (e.g. differential gene expression between diseased samples and controls, different response to a drug, etc.). The order in which the genes appear in the list is a consequence of the biological roles that the genes play within the cell, which account, at molecular scale, for the macroscopic differences observed between the phenotypes studied. Typically, two steps are followed for understanding the biological processes that differentiate phenotypes at molecular level: first, genes with significant differential expression are selected on the basis of their experimental values and subsequently, the functional properties of these genes are analysed. Instead, we present a simple procedure which combines experimental measurements with available biological information in a way that genes are simultaneously tested in groups related by common functional properties. The method proposed constitutes a very sensitive tool for selecting genes with significant differential behaviour in the experimental conditions tested. RESULTS: We propose the use of a method to scan ordered lists of genes. The method allows the understanding of the biological processes operating at molecular level behind the macroscopic experiment from which the list was generated. This procedure can be useful in situations where it is not possible to obtain statistically significant differences based on the experimental measurements (e.g. low prevalence diseases, etc.). Two examples demonstrate its application in two microarray experiments and the type of information that can be extracted.

}, keywords = {babelomics, Biological Neoplasm Proteins/genetics/*metabolism Phenotype Software Structure-Activity Relationship Systems Integration Tumor Markers, Biological/genetics/*metabolism, Breast Neoplasms/genetics/*metabolism Computer Simulation *Database Management Systems *Databases, Protein Documentation/methods Gene Expression Profiling/*methods Humans *Models}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=15840702}, author = {Fatima Al-Shahrour and Diaz-Uriarte, R. and Dopazo, J.} } @article {15247094, title = {DNMAD: web-based diagnosis and normalization for microarray data}, journal = {Bioinformatics}, volume = {20}, number = {18}, year = {2004}, note = {Vaquerizas, Juan M Dopazo, Joaquin Diaz-Uriarte, Ramon Research Support, Non-U.S. Gov{\textquoteright}t England Bioinformatics (Oxford, England) Bioinformatics. 2004 Dec 12;20(18):3656-8. Epub 2004 Jul 9.}, pages = {3656-8}, abstract = {SUMMARY: We present a web server for Diagnosis and Normalization of MicroArray Data (DNMAD). DNMAD includes several common data transformations such as spatial and global robust local regression or multiple slide normalization, and allows for detecting several kinds of errors that result from the manipulation and the image analysis of the arrays. This tool offers a user-friendly interface, and is completely integrated within the Gene Expression Pattern Analysis Suite (GEPAS). AVAILABILITY: The tool is accessible on-line at http://dnmad.bioinfo.cnio.es.}, keywords = {Algorithms Database Management Systems Gene Expression Profiling/*methods/standards Information Storage and Retrieval/*methods *Internet Oligonucleotide Array Sequence Analysis/*methods/standards Sequence Alignment/methods Sequence Analysis, DNA/*methods *Software *User-Computer Interface}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=15247094}, author = {Vaquerizas, J. M. and Dopazo, J. and Diaz-Uriarte, R.} } @article {11524379, title = {DBAli: a database of protein structure alignments}, journal = {Bioinformatics}, volume = {17}, number = {8}, year = {2001}, note = {Marti-Renom, M A Ilyin, V A Sali, A Research Support, Non-U.S. Gov{\textquoteright}t Research Support, U.S. Gov{\textquoteright}t, P.H.S. England Bioinformatics (Oxford, England) Bioinformatics. 2001 Aug;17(8):746-7.}, pages = {746-7}, abstract = {SUMMARY: The DBAli database includes approximately 35000 alignments of pairs of protein structures from SCOP (Lo Conte et al., Nucleic Acids Res., 28, 257-259, 2000) and CE (Shindyalov and Bourne, Protein Eng., 11, 739-747, 1998). DBAli is linked to several resources, including Compare3D (Shindyalov and Bourne, http://www.sdsc.edu/pb/software.htm, 1999) and ModView (Ilyin and Sali, http://guitar.rockefeller.edu/ModView/, 2001) for visualizing sequence alignments and structure superpositions. A flexible search of DBAli by protein sequence and structure properties allows construction of subsets of alignments suitable for a number of applications, such as benchmarking of sequence-sequence and sequence-structure alignment methods under a variety of conditions. AVAILABILITY: http://guitar.rockefeller.edu/DBAli/}, keywords = {Computational Biology *Databases, Protein Proteins/*chemistry/*genetics Sequence Alignment/*statistics \& numerical data Software Software Design}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=11524379}, author = {M. A. Marti-Renom and Ilyin, V. A. and Sali, A.} }