@article {805, title = {Drug-target identification in COVID-19 disease mechanisms using computational systems biology approaches.}, journal = {Front Immunol}, volume = {14}, year = {2024}, month = {2023}, pages = {1282859}, abstract = {

INTRODUCTION: The COVID-19 Disease Map project is a large-scale community effort uniting 277 scientists from 130 Institutions around the globe. We use high-quality, mechanistic content describing SARS-CoV-2-host interactions and develop interoperable bioinformatic pipelines for novel target identification and drug repurposing.

METHODS: Extensive community work allowed an impressive step forward in building interfaces between Systems Biology tools and platforms. Our framework can link biomolecules from omics data analysis and computational modelling to dysregulated pathways in a cell-, tissue- or patient-specific manner. Drug repurposing using text mining and AI-assisted analysis identified potential drugs, chemicals and microRNAs that could target the identified key factors.

RESULTS: Results revealed drugs already tested for anti-COVID-19 efficacy, providing a mechanistic context for their mode of action, and drugs already in clinical trials for treating other diseases, never tested against COVID-19.

DISCUSSION: The key advance is that the proposed framework is versatile and expandable, offering a significant upgrade in the arsenal for virus-host interactions and other complex pathologies.

}, keywords = {Computer Simulation, COVID-19, drug repositioning, Humans, SARS-CoV-2, Systems biology}, issn = {1664-3224}, doi = {10.3389/fimmu.2023.1282859}, author = {Niarakis, Anna and Ostaszewski, Marek and Mazein, Alexander and Kuperstein, Inna and Kutmon, Martina and Gillespie, Marc E and Funahashi, Akira and Acencio, Marcio Luis and Hemedan, Ahmed and Aichem, Michael and Klein, Karsten and Czauderna, Tobias and Burtscher, Felicia and Yamada, Takahiro G and Hiki, Yusuke and Hiroi, Noriko F and Hu, Finterly and Pham, Nhung and Ehrhart, Friederike and Willighagen, Egon L and Valdeolivas, Alberto and Dugourd, Aur{\'e}lien and Messina, Francesco and Esteban-Medina, Marina and Pe{\~n}a-Chilet, Maria and Rian, Kinza and Soliman, Sylvain and Aghamiri, Sara Sadat and Puniya, Bhanwar Lal and Naldi, Aur{\'e}lien and Helikar, Tom{\'a}{\v s} and Singh, Vidisha and Fern{\'a}ndez, Marco Fari{\~n}as and Bermudez, Viviam and Tsirvouli, Eirini and Montagud, Arnau and No{\"e}l, Vincent and Ponce-de-Leon, Miguel and Maier, Dieter and Bauch, Angela and Gyori, Benjamin M and Bachman, John A and Luna, Augustin and Pi{\~n}ero, Janet and Furlong, Laura I and Balaur, Irina and Rougny, Adrien and Jarosz, Yohan and Overall, Rupert W and Phair, Robert and Perfetto, Livia and Matthews, Lisa and Rex, Devasahayam Arokia Balaya and Orlic-Milacic, Marija and Gomez, Luis Cristobal Monraz and De Meulder, Bertrand and Ravel, Jean Marie and Jassal, Bijay and Satagopam, Venkata and Wu, Guanming and Golebiewski, Martin and Gawron, Piotr and Calzone, Laurence and Beckmann, Jacques S and Evelo, Chris T and D{\textquoteright}Eustachio, Peter and Schreiber, Falk and Saez-Rodriguez, Julio and Dopazo, Joaquin and Kuiper, Martin and Valencia, Alfonso and Wolkenhauer, Olaf and Kitano, Hiroaki and Barillot, Emmanuel and Auffray, Charles and Balling, Rudi and Schneider, Reinhard} } @article {770, title = {A Comprehensive Analysis of 21 Actionable Pharmacogenes in the Spanish Population: From Genetic Characterisation to Clinical Impact.}, journal = {Pharmaceutics}, volume = {15}, year = {2023}, month = {2023 Apr 19}, abstract = {

The implementation of pharmacogenetics (PGx) is a main milestones of precision medicine nowadays in order to achieve safer and more effective therapies. Nevertheless, the implementation of PGx diagnostics is extremely slow and unequal worldwide, in part due to a lack of ethnic PGx information. We analysed genetic data from 3006 Spanish individuals obtained by different high-throughput (HT) techniques. Allele frequencies were determined in our population for the main 21 actionable PGx genes associated with therapeutical changes. We found that 98\% of the Spanish population harbours at least one allele associated with a therapeutical change and, thus, there would be a need for a therapeutical change in a mean of 3.31 of the 64 associated drugs. We also identified 326 putative deleterious variants that were not previously related with PGx in 18 out of the 21 main PGx genes evaluated and a total of 7122 putative deleterious variants for the 1045 PGx genes described. Additionally, we performed a comparison of the main HT diagnostic techniques, revealing that after whole genome sequencing, genotyping with the PGx HT array is the most suitable solution for PGx diagnostics. Finally, all this information was integrated in the Collaborative Spanish Variant Server to be available to and updated by the scientific community.

}, issn = {1999-4923}, doi = {10.3390/pharmaceutics15041286}, author = {N{\'u}{\~n}ez-Torres, Roc{\'\i}o and Pita, Guillermo and Pe{\~n}a-Chilet, Maria and L{\'o}pez-L{\'o}pez, Daniel and Zamora, Jorge and Rold{\'a}n, Gema and Herr{\'a}ez, Bel{\'e}n and Alvarez, Nuria and Alonso, Mar{\'\i}a Rosario and Dopazo, Joaquin and Gonz{\'a}lez-Neira, Anna} } @article {775, title = {microRNAs-mediated regulation of insulin signaling in white adipose tissue during aging: Role of caloric restriction.}, journal = {Aging Cell}, year = {2023}, month = {2023 Jul 04}, pages = {e13919}, abstract = {

Caloric restriction is a non-pharmacological intervention known to ameliorate the metabolic defects associated with aging, including insulin resistance. The levels of miRNA expression may represent a predictive tool for aging-related alterations. In order to investigate the role of miRNAs underlying insulin resistance in adipose tissue during the early stages of aging, 3- and 12-month-old male animals fed ad libitum, and 12-month-old male animals fed with a 20\% caloric restricted diet were used. In this work we demonstrate that specific miRNAs may contribute to the impaired insulin-stimulated glucose metabolism specifically in the subcutaneous white adipose tissue, through the regulation of target genes implicated in the insulin signaling cascade. Moreover, the expression of these miRNAs is modified by caloric restriction in middle-aged animals, in accordance with the improvement of the metabolic state. Overall, our work demonstrates that alterations in posttranscriptional gene expression because of miRNAs dysregulation might represent an endogenous mechanism by which insulin response in the subcutaneous fat depot is already affected at middle age. Importantly, caloric restriction could prevent this modulation, demonstrating that certain miRNAs could constitute potential biomarkers of age-related metabolic alterations.

}, issn = {1474-9726}, doi = {10.1111/acel.13919}, author = {Corrales, Patricia and Martin-Taboada, Marina and Vivas-Garc{\'\i}a, Yurena and Torres, Lucia and Ramirez-Jimenez, Laura and Lopez, Yamila and Horrillo, Daniel and Vila-Bedmar, Rocio and Barber-Cano, Eloisa and Izquierdo-Lahuerta, Adriana and Pe{\~n}a-Chilet, Maria and Mart{\'\i}nez, Carmen and Dopazo, Joaquin and Ros, Manuel and Medina-Gomez, Gema} } @article {764, title = {Polystyrene nanoplastics affect transcriptomic and epigenomic signatures of human fibroblasts and derived induced pluripotent stem cells: Implications for human health.}, journal = {Environ Pollut}, year = {2023}, month = {2022 Dec 09}, pages = {120849}, abstract = {

Plastic pollution is increasing at an alarming rate yet the impact of this pollution on human health is poorly understood. Because human induced pluripotent stem cells (hiPSC) are frequently derived from dermal fibroblasts, these cells offer a powerful platform for the identification of molecular biomarkers of environmental pollution in human cells. Here, we describe a novel proof-of-concept for deriving hiPSC from human dermal fibroblasts deliberately exposed to polystyrene (PS) nanoplastic particles; unexposed hiPSC served as controls. In parallel, unexposed hiPSC were exposed to low and high concentrations of PS nanoparticles. Transcriptomic and epigenomic signatures of all fibroblasts and hiPSCs were defined using RNA-seq and whole genome methyl-seq, respectively. Both PS-treated fibroblasts and derived hiPSC showed alterations in expression of ESRRB and HNF1A genes and circuits involved in the pluripotency of stem cells, as well as in pathways involved in cancer, inflammatory disorders, gluconeogenesis, carbohydrate metabolism, innate immunity, and dopaminergic synapse. Similarly, the expression levels of identified key transcriptional and DNA methylation changes (DNMT3A, ESSRB, FAM133CP, HNF1A, SEPTIN7P8, and TTC34) were significantly affected in both PS-exposed fibroblasts and hiPSC. This study illustrates the power of human cellular models of environmental pollution to narrow down and prioritize the list of candidate molecular biomarkers of environmental pollution. This knowledge will facilitate the deciphering of the origins of environmental diseases.

}, issn = {1873-6424}, doi = {10.1016/j.envpol.2022.120849}, author = {Stojkovic, Miodrag and Ortu{\~n}o Guzm{\'a}n, Francisco Manuel and Han, Dongjun and Stojkovic, Petra and Dopazo, Joaquin and Stankovic, Konstantina M} } @article {795, title = {Visualization of automatically combined disease maps and pathway diagrams for rare diseases.}, journal = {Front Bioinform}, volume = {3}, year = {2023}, month = {2023}, pages = {1101505}, abstract = {

Investigation of molecular mechanisms of human disorders, especially rare diseases, require exploration of various knowledge repositories for building precise hypotheses and complex data interpretation. Recently, increasingly more resources offer diagrammatic representation of such mechanisms, including disease-dedicated schematics in pathway databases and disease maps. However, collection of knowledge across them is challenging, especially for research projects with limited manpower. In this article we present an automated workflow for construction of maps of molecular mechanisms for rare diseases. The workflow requires a standardized definition of a disease using Orphanet or HPO identifiers to collect relevant genes and variants, and to assemble a functional, visual repository of related mechanisms, including data overlays. The diagrams composing the final map are unified to a common systems biology format from CellDesigner SBML, GPML and SBML+layout+render. The constructed resource contains disease-relevant genes and variants as data overlays for immediate visual exploration, including embedded genetic variant browser and protein structure viewer. We demonstrate the functionality of our workflow on two examples of rare diseases: Kawasaki disease and retinitis pigmentosa. Two maps are constructed based on their corresponding identifiers. Moreover, for the retinitis pigmentosa use-case, we include a list of differentially expressed genes to demonstrate how to tailor the workflow using omics datasets. In summary, our work allows for an ad-hoc construction of molecular diagrams combined from different sources, preserving their layout and graphical style, but integrating them into a single resource. This allows to reduce time consuming tasks of prototyping of a molecular disease map, enabling visual exploration, hypothesis building, data visualization and further refinement. The code of the workflow is open and accessible at https://gitlab.lcsb.uni.lu/minerva/automap/.

}, issn = {2673-7647}, doi = {10.3389/fbinf.2023.1101505}, author = {Gawron, Piotr and Hoksza, David and Pi{\~n}ero, Janet and Pe{\~n}a-Chilet, Maria and Esteban-Medina, Marina and Fernandez-Rueda, Jose Luis and Colonna, Vincenza and Smula, Ewa and Heirendt, Laurent and Ancien, Fran{\c c}ois and Grou{\`e}s, Valentin and Satagopam, Venkata P and Schneider, Reinhard and Dopazo, Joaquin and Furlong, Laura I and Ostaszewski, Marek} } @article {750, title = {CIBERER: Spanish National Network for Research on Rare Diseases: a highly productive collaborative initiative.}, journal = {Clin Genet}, year = {2022}, month = {2022 Jan 20}, abstract = {

CIBER (Center for Biomedical Network Research; Centro de Investigaci{\'o}n Biom{\'e}dica En Red) is a public national consortium created in 2006 under the umbrella of the Spanish National Institute of Health Carlos III (ISCIII). This innovative research structure comprises 11 different specific areas dedicated to the main public health priorities in the National Health System. CIBERER, the thematic area of CIBER focused on Rare Diseases currently consists of 75 research groups belonging to universities, research centers and hospitals of the entire country. CIBERER{\textquoteright}s mission is to be a center prioritizing and favoring collaboration and cooperation between biomedical and clinical research groups, with special emphasis on the aspects of genetic, molecular, biochemical and cellular research of rare diseases. This research is the basis for providing new tools for the diagnosis and therapy of low-prevalence diseases, in line with the International Rare Diseases Research Consortium (IRDiRC) objectives, thus favoring translational research between the scientific environment of the laboratory and the clinical setting of health centers. In this paper, we intend to review CIBERER{\textquoteright}s 15-year journey and summarize the main results obtained in terms of internationalization, scientific production, contributions towards the discovery of new therapies and novel genes associated to diseases, cooperation with patients{\textquoteright} associations and many other topics related to rare disease research. This article is protected by copyright. All rights reserved.

}, issn = {1399-0004}, doi = {10.1111/cge.14113}, author = {Luque, Juan and Mendes, Ingrid and G{\'o}mez, Beatriz and Morte, Beatriz and de Heredia, Miguel L{\'o}pez and Herreras, Enrique and Corrochano, Virginia and Bueren, Juan and Gallano, Pia and Artuch, Rafael and Fillat, Cristina and P{\'e}rez-Jurado, Luis A and Montoliu, Lluis and Carracedo, {\'A}ngel and Mill{\'a}n, Jos{\'e} M and Webb, Susan M and Palau, Francesc and Lapunzina, Pablo} } @article {760, title = {Novel genes and sex differences in COVID-19 severity.}, journal = {Hum Mol Genet}, year = {2022}, month = {2022 Jun 16}, abstract = {

Here we describe the results of a genome-wide study conducted in 11 939 COVID-19 positive cases with an extensive clinical information that were recruited from 34 hospitals across Spain (SCOURGE consortium). In sex-disaggregated genome-wide association studies for COVID-19 hospitalization, genome-wide significance (p < 5x10-8) was crossed for variants in 3p21.31 and 21q22.11 loci only among males (p =~1.3x10-22 and p =~8.1x10-12, respectively), and for variants in 9q21.32 near TLE1 only among females (p =~4.4x10-8). In a second phase, results were combined with an independent Spanish cohort (1598 COVID-19 cases and 1068 population controls), revealing in the overall analysis two novel risk loci in 9p13.3 and 19q13.12, with fine-mapping prioritized variants functionally associated with AQP3 (p =~2.7x10-8) and ARHGAP33 (p =~1.3x10-8), respectively. The meta-analysis of both phases with four European studies stratified by sex from the Host Genetics Initiative confirmed the association of the 3p21.31 and 21q22.11 loci predominantly in males and replicated a recently reported variant in 11p13 (ELF5, p = 4.1x10-8). Six of the COVID-19 HGI discovered loci were replicated and an HGI-based genetic risk score predicted the severity strata in SCOURGE. We also found more SNP-heritability and larger heritability differences by age (<60 or >= 60~years) among males than among females. Parallel genome-wide screening of inbreeding depression in SCOURGE also showed an effect of homozygosity in COVID-19 hospitalization and severity and this effect was stronger among older males. In summary, new candidate genes for COVID-19 severity and evidence supporting genetic disparities among sexes are provided.

}, issn = {1460-2083}, doi = {10.1093/hmg/ddac132}, author = {Cruz, Raquel and Almeida, Silvia Diz-de and Heredia, Miguel L{\'o}pez and Quintela, In{\'e}s and Ceballos, Francisco C and Pita, Guillermo and Lorenzo-Salazar, Jos{\'e} M and Gonz{\'a}lez-Montelongo, Rafaela and Gago-Dom{\'\i}nguez, Manuela and Porras, Marta Sevilla and Casta{\~n}o, Jair Antonio Tenorio and Nevado, Juli{\'a}n and Aguado, Jose Mar{\'\i}a and Aguilar, Carlos and Aguilera-Albesa, Sergio and Almadana, Virginia and Almoguera, Berta and Alvarez, Nuria and Andreu-Bernabeu, {\'A}lvaro and Arana-Arri, Eunate and Arango, Celso and Arranz, Mar{\'\i}a J and Artiga, Maria-Jesus and Baptista-Rosas, Ra{\'u}l C and Barreda-S{\'a}nchez, Mar{\'\i}a and Belhassen-Garcia, Moncef and Bezerra, Joao F and Bezerra, Marcos A C and Boix-Palop, Luc{\'\i}a and Bri{\'o}n, Maria and Brugada, Ram{\'o}n and Bustos, Matilde and Calder{\'o}n, Enrique J and Carbonell, Cristina and Castano, Luis and Castelao, Jose E and Conde-Vicente, Rosa and Cordero-Lorenzana, M Lourdes and Cortes-Sanchez, Jose L and Corton, Marta and Darnaude, M Teresa and De Martino-Rodr{\'\i}guez, Alba and Campo-P{\'e}rez, Victor and Bustamante, Aranzazu Diaz and Dom{\'\i}nguez-Garrido, Elena and Luchessi, Andr{\'e} D and Eir{\'o}s, Roc{\'\i}o and Sanabria, Gladys Mercedes Estigarribia and Fari{\~n}as, Mar{\'\i}a Carmen and Fern{\'a}ndez-Robelo, Ux{\'\i}a and Fern{\'a}ndez-Rodr{\'\i}guez, Amanda and Fern{\'a}ndez-Villa, Tania and Gil-Fournier, Bel{\'e}n and G{\'o}mez-Arrue, Javier and {\'A}lvarez, Beatriz Gonz{\'a}lez and Quir{\'o}s, Fernan Gonzalez Bernaldo and Gonz{\'a}lez-Pe{\~n}as, Javier and Guti{\'e}rrez-Bautista, Juan F and Herrero, Mar{\'\i}a Jos{\'e} and Herrero-Gonzalez, Antonio and Jimenez-Sousa, Mar{\'\i}a A and Lattig, Mar{\'\i}a Claudia and Borja, Anabel Liger and Lopez-Rodriguez, Rosario and Mancebo, Esther and Mart{\'\i}n-L{\'o}pez, Caridad and Mart{\'\i}n, Vicente and Martinez-Nieto, Oscar and Martinez-Lopez, Iciar and Martinez-Resendez, Michel F and Martinez-Perez, {\'A}ngel and Mazzeu, Juliana A and Mac{\'\i}as, Eleuterio Merayo and Minguez, Pablo and Cuerda, Victor Moreno and Silbiger, Vivian N and Oliveira, Silviene F and Ortega-Paino, Eva and Parellada, Mara and Paz-Artal, Estela and Santos, Ney P C and P{\'e}rez-Matute, Patricia and Perez, Patricia and P{\'e}rez-Tom{\'a}s, M Elena and Perucho, Teresa and Pinsach-Abuin, Mel Lina and Pompa-Mera, Ericka N and Porras-Hurtado, Gloria L and Pujol, Aurora and Le{\'o}n, Soraya Ramiro and Resino, Salvador and Fernandes, Marianne R and Rodr{\'\i}guez-Ruiz, Emilio and Rodriguez-Artalejo, Fernando and Rodriguez-Garcia, Jos{\'e} A and Ruiz-Cabello, Francisco and Ruiz-Hornillos, Javier and Ryan, Pablo and Soria, Jos{\'e} Manuel and Souto, Juan Carlos and Tamayo, Eduardo and Tamayo-Velasco, Alvaro and Taracido-Fernandez, Juan Carlos and Teper, Alejandro and Torres-Tobar, Lilian and Urioste, Miguel and Valencia-Ramos, Juan and Y{\'a}{\~n}ez, Zuleima and Zarate, Ruth and Nakanishi, Tomoko and Pigazzini, Sara and Degenhardt, Frauke and Butler-Laporte, Guillaume and Maya-Miles, Douglas and Bujanda, Luis and Bouysran, Youssef and Palom, Adriana and Ellinghaus, David and Mart{\'\i}nez-Bueno, Manuel and Rolker, Selina and Amitrano, Sara and Roade, Luisa and Fava, Francesca and Spinner, Christoph D and Prati, Daniele and Bernardo, David and Garc{\'\i}a, Federico and Darcis, Gilles and Fern{\'a}ndez-Cadenas, Israel and Holter, Jan Cato and Banales, Jesus M and Frithiof, Robert and Duga, Stefano and Asselta, Rosanna and Pereira, Alexandre C and Romero-G{\'o}mez, Manuel and Nafr{\'\i}a-Jim{\'e}nez, Beatriz and Hov, Johannes R and Migeotte, Isabelle and Renieri, Alessandra and Planas, Anna M and Ludwig, Kerstin U and Buti, Maria and Rahmouni, Souad and Alarc{\'o}n-Riquelme, Marta E and Schulte, Eva C and Franke, Andre and Karlsen, Tom H and Valenti, Luca and Zeberg, Hugo and Richards, Brent and Ganna, Andrea and Boada, Merc{\`e} and Rojas, Itziar and Ruiz, Agust{\'\i}n and S{\'a}nchez, Pascual and Real, Luis Miguel and Guill{\'e}n-Navarro, Encarna and Ayuso, Carmen and Gonz{\'a}lez-Neira, Anna and Riancho, Jos{\'e} A and Rojas-Martinez, Augusto and Flores, Carlos and Lapunzina, Pablo and Carracedo, {\'A}ngel} } @article {736, title = {COVID19 Disease Map, a computational knowledge repository of virus-host interaction mechanisms.}, journal = {Mol Syst Biol}, volume = {17}, year = {2021}, month = {2021 10}, pages = {e10387}, abstract = {

We need to effectively combine the knowledge from surging literature with complex datasets to propose mechanistic models of SARS-CoV-2 infection, improving data interpretation and predicting key targets of intervention. Here, we describe a large-scale community effort to build an open access, interoperable and computable repository of COVID-19 molecular mechanisms. The COVID-19 Disease Map (C19DMap) is a graphical, interactive representation of disease-relevant molecular mechanisms linking many knowledge sources. Notably, it is a computational resource for graph-based analyses and disease modelling. To this end, we established a framework of tools, platforms and guidelines necessary for a multifaceted community of biocurators, domain experts, bioinformaticians and computational biologists. The diagrams of the C19DMap, curated from the literature, are integrated with relevant interaction and text mining databases. We demonstrate the application of network analysis and modelling approaches by concrete examples to highlight new testable hypotheses. This framework helps to find signatures of SARS-CoV-2 predisposition, treatment response or prioritisation of drug candidates. Such an approach may help deal with new waves of COVID-19 or similar pandemics in the long-term perspective.

}, keywords = {Antiviral Agents, Computational Biology, Computer Graphics, COVID-19, Cytokines, Data Mining, Databases, Factual, Gene Expression Regulation, Host Microbial Interactions, Humans, Immunity, Cellular, Immunity, Humoral, Immunity, Innate, Lymphocytes, Metabolic Networks and Pathways, Myeloid Cells, Protein Interaction Mapping, SARS-CoV-2, Signal Transduction, Software, Transcription Factors, Viral Proteins}, issn = {1744-4292}, doi = {10.15252/msb.202110387}, author = {Ostaszewski, Marek and Niarakis, Anna and Mazein, Alexander and Kuperstein, Inna and Phair, Robert and Orta-Resendiz, Aurelio and Singh, Vidisha and Aghamiri, Sara Sadat and Acencio, Marcio Luis and Glaab, Enrico and Ruepp, Andreas and Fobo, Gisela and Montrone, Corinna and Brauner, Barbara and Frishman, Goar and Monraz G{\'o}mez, Luis Crist{\'o}bal and Somers, Julia and Hoch, Matti and Kumar Gupta, Shailendra and Scheel, Julia and Borlinghaus, Hanna and Czauderna, Tobias and Schreiber, Falk and Montagud, Arnau and Ponce de Leon, Miguel and Funahashi, Akira and Hiki, Yusuke and Hiroi, Noriko and Yamada, Takahiro G and Dr{\"a}ger, Andreas and Renz, Alina and Naveez, Muhammad and Bocskei, Zsolt and Messina, Francesco and B{\"o}rnigen, Daniela and Fergusson, Liam and Conti, Marta and Rameil, Marius and Nakonecnij, Vanessa and Vanhoefer, Jakob and Schmiester, Leonard and Wang, Muying and Ackerman, Emily E and Shoemaker, Jason E and Zucker, Jeremy and Oxford, Kristie and Teuton, Jeremy and Kocakaya, Ebru and Summak, G{\"o}k{\c c}e Ya{\u g}mur and Hanspers, Kristina and Kutmon, Martina and Coort, Susan and Eijssen, Lars and Ehrhart, Friederike and Rex, Devasahayam Arokia Balaya and Slenter, Denise and Martens, Marvin and Pham, Nhung and Haw, Robin and Jassal, Bijay and Matthews, Lisa and Orlic-Milacic, Marija and Senff Ribeiro, Andrea and Rothfels, Karen and Shamovsky, Veronica and Stephan, Ralf and Sevilla, Cristoffer and Varusai, Thawfeek and Ravel, Jean-Marie and Fraser, Rupsha and Ortseifen, Vera and Marchesi, Silvia and Gawron, Piotr and Smula, Ewa and Heirendt, Laurent and Satagopam, Venkata and Wu, Guanming and Riutta, Anders and Golebiewski, Martin and Owen, Stuart and Goble, Carole and Hu, Xiaoming and Overall, Rupert W and Maier, Dieter and Bauch, Angela and Gyori, Benjamin M and Bachman, John A and Vega, Carlos and Grou{\`e}s, Valentin and Vazquez, Miguel and Porras, Pablo and Licata, Luana and Iannuccelli, Marta and Sacco, Francesca and Nesterova, Anastasia and Yuryev, Anton and de Waard, Anita and Turei, Denes and Luna, Augustin and Babur, Ozgun and Soliman, Sylvain and Valdeolivas, Alberto and Esteban-Medina, Marina and Pe{\~n}a-Chilet, Maria and Rian, Kinza and Helikar, Tom{\'a}{\v s} and Puniya, Bhanwar Lal and Modos, Dezso and Treveil, Agatha and Olbei, Marton and De Meulder, Bertrand and Ballereau, Stephane and Dugourd, Aur{\'e}lien and Naldi, Aur{\'e}lien and No{\"e}l, Vincent and Calzone, Laurence and Sander, Chris and Demir, Emek and Korcsmaros, Tamas and Freeman, Tom C and Aug{\'e}, Franck and Beckmann, Jacques S and Hasenauer, Jan and Wolkenhauer, Olaf and Wilighagen, Egon L and Pico, Alexander R and Evelo, Chris T and Gillespie, Marc E and Stein, Lincoln D and Hermjakob, Henning and D{\textquoteright}Eustachio, Peter and Saez-Rodriguez, Julio and Dopazo, Joaquin and Valencia, Alfonso and Kitano, Hiroaki and Barillot, Emmanuel and Auffray, Charles and Balling, Rudi and Schneider, Reinhard} } @article {720, title = {A DNA damage repair gene-associated signature predicts responses of patients with advanced soft-tissue sarcoma to treatment with trabectedin.}, journal = {Mol Oncol}, volume = {15}, year = {2021}, month = {2021 12}, pages = {3691-3705}, abstract = {

Predictive biomarkers of trabectedin represent an unmet need in advanced soft-tissue sarcomas (STS). DNA damage repair (DDR) genes, involved in homologous recombination or nucleotide excision repair, had been previously described as biomarkers of trabectedin resistance or sensitivity, respectively. The majority of these studies only focused on specific factors (ERCC1, ERCC5, and BRCA1) and did not evaluate several other DDR-related genes that could have a relevant role for trabectedin efficacy. In this retrospective translational study, 118 genes involved in DDR were evaluated to determine, by transcriptomics, a predictive gene signature of trabectedin efficacy. A six-gene predictive signature of trabectedin efficacy was built in a series of 139 tumor samples from patients with advanced STS. Patients in the high-risk gene signature group showed a significantly worse progression-free survival compared with patients in the low-risk group (2.1 vs 6.0 months, respectively). Differential gene expression analysis defined new potential predictive biomarkers of trabectedin sensitivity (PARP3 and CCNH) or resistance (DNAJB11 and PARP1). Our study identified a new gene signature that significantly predicts patients with higher probability to respond to treatment with trabectedin. Targeting some genes of this signature emerges as a potential strategy to enhance trabectedin efficacy.

}, issn = {1878-0261}, doi = {10.1002/1878-0261.12996}, author = {Moura, David S and Pe{\~n}a-Chilet, Maria and Cordero Varela, Juan Antonio and Alvarez-Alegret, Ramiro and Agra-Pujol, Carolina and Izquierdo, Francisco and Ramos, Rafael and Ortega-Medina, Luis and Martin-Davila, Francisco and Castilla-Ramirez, Carolina and Hernandez-Leon, Carmen Nieves and Romagosa, Cleofe and Vaz Salgado, Maria Angeles and Lavernia, Javier and Bagu{\'e}, Silvia and Mayodormo-Aranda, Empar and Vicioso, Luis and Hern{\'a}ndez Barcel{\'o}, Jose Emilio and Rubio-Casadevall, Jordi and de Juan, Ana and Fia{\~n}o-Valverde, Maria Concepcion and Hindi, Nadia and Lopez-Alvarez, Maria and Lacerenza, Serena and Dopazo, Joaquin and Gutierrez, Antonio and Alvarez, Rosa and Valverde, Claudia and Martinez-Trufero, Javier and Martin-Broto, Javier} } @article {728, title = {DOME: recommendations for supervised machine learning validation in biology.}, journal = {Nat Methods}, volume = {18}, year = {2021}, month = {2021 10}, pages = {1122-1127}, keywords = {Algorithms, Computational Biology, Guidelines as Topic, Humans, Models, Biological, Research Design, Supervised Machine Learning}, issn = {1548-7105}, doi = {10.1038/s41592-021-01205-4}, author = {Walsh, Ian and Fishman, Dmytro and Garcia-Gasulla, Dario and Titma, Tiina and Pollastri, Gianluca and Harrow, Jennifer and Psomopoulos, Fotis E and Tosatto, Silvio C E} } @article {724, title = {Genome-scale mechanistic modeling of signaling pathways made easy: A bioconductor/cytoscape/web server framework for the analysis of omic data}, journal = {Computational and Structural Biotechnology Journal}, volume = {19}, year = {2021}, month = {Jan-01-2021}, pages = {2968 - 2978}, issn = {20010370}, doi = {10.1016/j.csbj.2021.05.022}, url = {https://linkinghub.elsevier.com/retrieve/pii/S2001037021002038}, author = {Rian, Kinza and Hidalgo, Marta R. and Cubuk, Cankut and Falco, Matias M. and Loucera, Carlos and Esteban-Medina, Marina and Alamo-Alvarez, Inmaculada and Pe{\~n}a-Chilet, Maria and Dopazo, Joaquin} } @article {711, title = {Mechanistic modeling of the SARS-CoV-2 disease map.}, journal = {BioData Min}, volume = {14}, year = {2021}, month = {2021 Jan 21}, pages = {5}, abstract = {

Here we present a web interface that implements a comprehensive mechanistic model of the SARS-CoV-2 disease map. In this framework, the detailed activity of the human signaling circuits related to the viral infection, covering from the entry and replication mechanisms to the downstream consequences as inflammation and antigenic response, can be inferred from gene expression experiments. Moreover, the effect of potential interventions, such as knock-downs, or drug effects (currently the system models the effect of more than 8000 DrugBank drugs) can be studied. This freely available tool not only provides an unprecedentedly detailed view of the mechanisms of viral invasion and the consequences in the cell but has also the potential of becoming an invaluable asset in the search for efficient antiviral treatments.

}, issn = {1756-0381}, doi = {10.1186/s13040-021-00234-1}, author = {Rian, Kinza and Esteban-Medina, Marina and Hidalgo, Marta R and Cubuk, Cankut and Falco, Matias M and Loucera, Carlos and Gunyel, Devrim and Ostaszewski, Marek and Pe{\~n}a-Chilet, Maria and Dopazo, Joaquin} } @article {714, title = {The NCI Genomic Data Commons}, journal = {Nature Genetics}, year = {2021}, month = {Oct-02-2022}, issn = {1061-4036}, doi = {10.1038/s41588-021-00791-5}, url = {http://www.nature.com/articles/s41588-021-00791-5}, author = {Heath, Allison P. and Ferretti, Vincent and Agrawal, Stuti and An, Maksim and Angelakos, James C. and Arya, Renuka and Bajari, Rosita and Baqar, Bilal and Barnowski, Justin H. B. and Burt, Jeffrey and Catton, Ann and Chan, Brandon F. and Chu, Fay and Cullion, Kim and Davidsen, Tanja and Do, Phuong-My and Dompierre, Christian and Ferguson, Martin L. and Fitzsimons, Michael S. and Ford, Michael and Fukuma, Miyuki and Gaheen, Sharon and Ganji, Gajanan L. and Garcia, Tzintzuni I. and George, Sameera S. and Gerhard, Daniela S. and Gerthoffert, Francois and Gomez, Fauzi and Han, Kang and Hernandez, Kyle M. and Issac, Biju and Jackson, Richard and Jensen, Mark A. and Joshi, Sid and Kadam, Ajinkya and Khurana, Aishmit and Kim, Kyle M. J. and Kraft, Victoria E. and Li, Shenglai and Lichtenberg, Tara M. and Lodato, Janice and Lolla, Laxmi and Martinov, Plamen and Mazzone, Jeffrey A. and Miller, Daniel P. and Miller, Ian and Miller, Joshua S. and Miyauchi, Koji and Murphy, Mark W. and Nullet, Thomas and Ogwara, Rowland O. and Ortu{\~n}o, Francisco M. and Pedrosa, Jes{\'u}s and Pham, Phuong L. and Popov, Maxim Y. and Porter, James J. and Powell, Raymond and Rademacher, Karl and Reid, Colin P. and Rich, Samantha and Rogel, Bessie and Sahni, Himanso and Savage, Jeremiah H. and Schmitt, Kyle A. and Simmons, Trevar J. and Sislow, Joseph and Spring, Jonathan and Stein, Lincoln and Sullivan, Sean and Tang, Yajing and Thiagarajan, Mathangi and Troyer, Heather D. and Wang, Chang and Wang, Zhining and West, Bedford L. and Wilmer, Alex and Wilson, Shane and Wu, Kaman and Wysocki, William P. and Xiang, Linda and Yamada, Joseph T. and Yang, Liming and Yu, Christine and Yung, Christina K. and Zenklusen, Jean Claude and Zhang, Junjun and Zhang, Zhenyu and Zhao, Yuanheng and Zubair, Ariz and Staudt, Louis M. and Grossman, Robert L.} } @article {734, title = {Orchestrating and sharing large multimodal data for transparent and reproducible research.}, journal = {Nat Commun}, volume = {12}, year = {2021}, month = {2021 10 04}, pages = {5797}, abstract = {

Reproducibility is essential to open science, as there is limited relevance for findings that can not be reproduced by independent research groups, regardless of its validity. It is therefore crucial for scientists to describe their experiments in sufficient detail so they can be reproduced, scrutinized, challenged, and built upon. However, the intrinsic complexity and continuous growth of biomedical data makes it increasingly difficult to process, analyze, and share with the community in a FAIR (findable, accessible, interoperable, and reusable) manner. To overcome these issues, we created a cloud-based platform called ORCESTRA ( orcestra.ca ), which provides a flexible framework for the reproducible processing of multimodal biomedical data. It enables processing of clinical, genomic and perturbation profiles of cancer samples through automated processing pipelines that are user-customizable. ORCESTRA creates integrated and fully documented data objects with persistent identifiers (DOI) and manages multiple dataset versions, which can be shared for future studies.

}, issn = {2041-1723}, doi = {10.1038/s41467-021-25974-w}, author = {Mammoliti, Anthony and Smirnov, Petr and Nakano, Minoru and Safikhani, Zhaleh and Eeles, Christopher and Seo, Heewon and Nair, Sisira Kadambat and Mer, Arvind S and Smith, Ian and Ho, Chantal and Beri, Gangesh and Kusko, Rebecca and Lin, Eva and Yu, Yihong and Martin, Scott and Hafner, Marc and Haibe-Kains, Benjamin} } @article {742, title = {Reporting guidelines for human microbiome research: the STORMS checklist.}, journal = {Nat Med}, volume = {27}, year = {2021}, month = {2021 11}, pages = {1885-1892}, abstract = {

The particularly interdisciplinary nature of human microbiome research makes the organization and reporting of results spanning epidemiology, biology, bioinformatics, translational medicine and statistics a challenge. Commonly used reporting guidelines for observational or genetic epidemiology studies lack key features specific to microbiome studies. Therefore, a multidisciplinary group of microbiome epidemiology researchers adapted guidelines for observational and genetic studies to culture-independent human microbiome studies, and also developed new reporting elements for laboratory, bioinformatics and statistical analyses tailored to microbiome studies. The resulting tool, called {\textquoteright}Strengthening The Organization and Reporting of Microbiome Studies{\textquoteright} (STORMS), is composed of a 17-item checklist organized into six sections that correspond to the typical sections of a scientific publication, presented as an editable table for inclusion in supplementary materials. The STORMS checklist provides guidance for concise and complete reporting of microbiome studies that will facilitate manuscript preparation, peer review, and reader comprehension of publications and comparative analysis of published results.

}, keywords = {Computational Biology, Dysbiosis, Humans, Microbiota, Observational Studies as Topic, Research Design, Translational Science, Biomedical}, issn = {1546-170X}, doi = {10.1038/s41591-021-01552-x}, author = {Mirzayi, Chloe and Renson, Audrey and Zohra, Fatima and Elsafoury, Shaimaa and Geistlinger, Ludwig and Kasselman, Lora J and Eckenrode, Kelly and van de Wijgert, Janneke and Loughman, Amy and Marques, Francine Z and MacIntyre, David A and Arumugam, Manimozhiyan and Azhar, Rimsha and Beghini, Francesco and Bergstrom, Kirk and Bhatt, Ami and Bisanz, Jordan E and Braun, Jonathan and Bravo, Hector Corrada and Buck, Gregory A and Bushman, Frederic and Casero, David and Clarke, Gerard and Collado, Maria Carmen and Cotter, Paul D and Cryan, John F and Demmer, Ryan T and Devkota, Suzanne and Elinav, Eran and Escobar, Juan S and Fettweis, Jennifer and Finn, Robert D and Fodor, Anthony A and Forslund, Sofia and Franke, Andre and Furlanello, Cesare and Gilbert, Jack and Grice, Elizabeth and Haibe-Kains, Benjamin and Handley, Scott and Herd, Pamela and Holmes, Susan and Jacobs, Jonathan P and Karstens, Lisa and Knight, Rob and Knights, Dan and Koren, Omry and Kwon, Douglas S and Langille, Morgan and Lindsay, Brianna and McGovern, Dermot and McHardy, Alice C and McWeeney, Shannon and Mueller, Noel T and Nezi, Luigi and Olm, Matthew and Palm, Noah and Pasolli, Edoardo and Raes, Jeroen and Redinbo, Matthew R and R{\"u}hlemann, Malte and Balfour Sartor, R and Schloss, Patrick D and Schriml, Lynn and Segal, Eran and Shardell, Michelle and Sharpton, Thomas and Smirnova, Ekaterina and Sokol, Harry and Sonnenburg, Justin L and Srinivasan, Sujatha and Thingholm, Louise B and Turnbaugh, Peter J and Upadhyay, Vaibhav and Walls, Ramona L and Wilmes, Paul and Yamada, Takuji and Zeller, Georg and Zhang, Mingyu and Zhao, Ni and Zhao, Liping and Bao, Wenjun and Culhane, Aedin and Devanarayan, Viswanath and Dopazo, Joaquin and Fan, Xiaohui and Fischer, Matthias and Jones, Wendell and Kusko, Rebecca and Mason, Christopher E and Mercer, Tim R and Sansone, Susanna-Assunta and Scherer, Andreas and Shi, Leming and Thakkar, Shraddha and Tong, Weida and Wolfinger, Russ and Hunter, Christopher and Segata, Nicola and Huttenhower, Curtis and Dowd, Jennifer B and Jones, Heidi E and Waldron, Levi} } @article {715, title = {Uniform genomic data analysis in the NCI Genomic Data CommonsAbstract}, journal = {Nature Communications}, volume = {12}, year = {2021}, month = {Jan-12-2021}, doi = {10.1038/s41467-021-21254-9}, url = {http://www.nature.com/articles/s41467-021-21254-9}, author = {Zhang, Zhenyu and Hernandez, Kyle and Savage, Jeremiah and Li, Shenglai and Miller, Dan and Agrawal, Stuti and Ortuno, Francisco and Staudt, Louis M. and Heath, Allison and Grossman, Robert L.} } @article {696, title = {Community Assessment of the Predictability of Cancer Protein and Phosphoprotein Levels from Genomics and Transcriptomics.}, journal = {Cell Syst}, volume = {11}, year = {2020}, month = {2020 08 26}, pages = {186-195.e9}, abstract = {

Cancer is driven by genomic alterations, but the processes causing this disease are largely performed by proteins. However, proteins are harder and more expensive to measure than genes and transcripts. To catalyze developments of methods to infer protein levels from other omics measurements, we leveraged crowdsourcing via the NCI-CPTAC DREAM proteogenomic challenge. We asked for methods to predict protein and phosphorylation levels from genomic and transcriptomic data in cancer patients. The best performance was achieved by an ensemble of models, including as predictors transcript level of the corresponding genes, interaction between genes, conservation across tumor types, and phosphosite proximity for phosphorylation prediction. Proteins from metabolic pathways and complexes were the best and worst predicted, respectively. The performance of even the best-performing model was modest, suggesting that many proteins are strongly regulated through translational control and degradation. Our results set a reference for the limitations of computational inference in proteogenomics. A record of this paper{\textquoteright}s transparent peer review process is included in the Supplemental Information.

}, keywords = {Crowdsourcing, Female, Genomics, Humans, Machine Learning, Male, Neoplasms, Phosphoproteins, Proteins, Proteomics, Transcriptome}, issn = {2405-4720}, doi = {10.1016/j.cels.2020.06.013}, author = {Yang, Mi and Petralia, Francesca and Li, Zhi and Li, Hongyang and Ma, Weiping and Song, Xiaoyu and Kim, Sunkyu and Lee, Heewon and Yu, Han and Lee, Bora and Bae, Seohui and Heo, Eunji and Kaczmarczyk, Jan and St{\k e}pniak, Piotr and Warcho{\l}, Micha{\l} and Yu, Thomas and Calinawan, Anna P and Boutros, Paul C and Payne, Samuel H and Reva, Boris and Boja, Emily and Rodriguez, Henry and Stolovitzky, Gustavo and Guan, Yuanfang and Kang, Jaewoo and Wang, Pei and Feny{\"o}, David and Saez-Rodriguez, Julio} } @article {689, title = {COVID-19 Disease Map, building a computational repository of SARS-CoV-2 virus-host interaction mechanisms.}, journal = {Sci Data}, volume = {7}, year = {2020}, month = {2020 05 05}, pages = {136}, keywords = {Betacoronavirus, Computational Biology, Coronavirus Infections, COVID-19, Databases, Factual, Host Microbial Interactions, Host-Pathogen Interactions, Humans, International Cooperation, Models, Biological, Pandemics, Pneumonia, Viral, SARS-CoV-2}, issn = {2052-4463}, doi = {10.1038/s41597-020-0477-8}, author = {Ostaszewski, Marek and Mazein, Alexander and Gillespie, Marc E and Kuperstein, Inna and Niarakis, Anna and Hermjakob, Henning and Pico, Alexander R and Willighagen, Egon L and Evelo, Chris T and Hasenauer, Jan and Schreiber, Falk and Dr{\"a}ger, Andreas and Demir, Emek and Wolkenhauer, Olaf and Furlong, Laura I and Barillot, Emmanuel and Dopazo, Joaquin and Orta-Resendiz, Aurelio and Messina, Francesco and Valencia, Alfonso and Funahashi, Akira and Kitano, Hiroaki and Auffray, Charles and Balling, Rudi and Schneider, Reinhard} } @article {729, title = {The ELIXIR Human Copy Number Variations Community: building bioinformatics infrastructure for research.}, journal = {F1000Res}, volume = {9}, year = {2020}, month = {2020}, chapter = {1229}, abstract = {

Copy number variations (CNVs) are major causative contributors both in the genesis of genetic diseases and human neoplasias. While "High-Throughput" sequencing technologies are increasingly becoming the primary choice for genomic screening analysis, their ability to efficiently detect CNVs is still heterogeneous and remains to be developed. The aim of this white paper is to provide a guiding framework for the future contributions of ELIXIR{\textquoteright}s recently established with implications beyond human disease diagnostics and population genomics. This white paper is the direct result of a strategy meeting that took place in September 2018 in Hinxton (UK) and involved representatives of 11 ELIXIR Nodes. The meeting led to the definition of priority objectives and tasks, to address a wide range of CNV-related challenges ranging from detection and interpretation to sharing and training. Here, we provide suggestions on how to align these tasks within the ELIXIR Platforms strategy, and on how to frame the activities of this new ELIXIR Community in the international context.

}, keywords = {Computational Biology, DNA Copy Number Variations, High-Throughput Nucleotide Sequencing, Humans}, issn = {2046-1402}, doi = {10.12688/f1000research.24887.1}, author = {Salgado, David and Armean, Irina M and Baudis, Michael and Beltran, Sergi and Capella-Gut{\'\i}errez, Salvador and Carvalho-Silva, Denise and Dominguez Del Angel, Victoria and Dopazo, Joaquin and Furlong, Laura I and Gao, Bo and Garcia, Leyla and Gerloff, Dietlind and Gut, Ivo and Gyenesei, Attila and Habermann, Nina and Hancock, John M and Hanauer, Marc and Hovig, Eivind and Johansson, Lennart F and Keane, Thomas and Korbel, Jan and Lauer, Katharina B and Laurie, Steve and Lesko{\v s}ek, Brane and Lloyd, David and Marqu{\'e}s-Bonet, Tom{\'a}s and Mei, Hailiang and Monostory, Katalin and Pi{\~n}ero, Janet and Poterlowicz, Krzysztof and Rath, Ana and Samarakoon, Pubudu and Sanz, Ferran and Saunders, Gary and Sie, Daoud and Swertz, Morris A and Tsukanov, Kirill and Valencia, Alfonso and Vidak, Marko and Yenyxe Gonz{\'a}lez, Cristina and Ylstra, Bauke and B{\'e}roud, Christophe} } @article {693, title = {Immune Cell Associations with Cancer Risk.}, journal = {iScience}, volume = {23}, year = {2020}, month = {2020 Jul 24}, pages = {101296}, abstract = {

Proper immune system function hinders cancer development, but little is known about whether genetic variants linked to cancer risk alter immune cells. Here, we report 57 cancer risk loci associated with differences in immune and/or stromal cell contents in the corresponding tissue. Predicted target genes show expression and regulatory associations with immune features. Polygenic risk scores also reveal associations with immune and/or stromal cell contents, and breast cancer scores show consistent results in normal and tumor tissue. SH2B3 links peripheral alterations of several immune cell types to the risk of this malignancy. Pleiotropic SH2B3 variants are associated with breast cancer risk in BRCA1/2 mutation carriers. A retrospective case-cohort study indicates a positive association between blood counts of basophils, leukocytes, and monocytes and age at breast cancer diagnosis. These findings broaden our knowledge of the role of the immune system in cancer and highlight promising prevention strategies for individuals at high risk.

}, issn = {2589-0042}, doi = {10.1016/j.isci.2020.101296}, author = {Palomero, Luis and Galv{\'a}n-Femen{\'\i}a, Ivan and de Cid, Rafael and Esp{\'\i}n, Roderic and Barnes, Daniel R and Blommaert, Eline and Gil-Gil, Miguel and Falo, Catalina and Stradella, Agostina and Ouchi, Dan and Roso-Llorach, Albert and Violan, Concepci{\'o} and Pe{\~n}a-Chilet, Maria and Dopazo, Joaquin and Extremera, Ana Isabel and Garc{\'\i}a-Valero, Mar and Herranz, Carmen and Mateo, Francesca and Mereu, Elisabetta and Beesley, Jonathan and Chenevix-Trench, Georgia and Roux, Cecilia and Mak, Tak and Brunet, Joan and Hakem, Razq and Gorrini, Chiara and Antoniou, Antonis C and L{\'a}zaro, Conxi and Pujana, Miquel Angel} } @article {702, title = {Mechanistic models of signaling pathways deconvolute the glioblastoma single-cell functional landscapeAbstract}, journal = {NAR Cancer}, volume = {2}, year = {2020}, month = {Jan-06-2020}, doi = {10.1093/narcan/zcaa011}, url = {https://academic.oup.com/narcancer/article/doi/10.1093/narcan/zcaa011/5862620http://academic.oup.com/narcancer/article-pdf/2/2/zcaa011/33428092/zcaa011.pdfhttp://academic.oup.com/narcancer/article-pdf/2/2/zcaa011/33428092/zcaa011.pdf}, author = {Falco, Matias M and Pe{\~n}a-Chilet, Maria and Loucera, Carlos and Hidalgo, Marta R and Dopazo, Joaquin} } @article {707, title = {Nivolumab and sunitinib combination in advanced soft tissue sarcomas: a multicenter, single-arm, phase Ib/II trial.}, journal = {J Immunother Cancer}, volume = {8}, year = {2020}, month = {2020 11}, abstract = {

BACKGROUND: Sarcomas exhibit low expression of factors related to immune response, which could explain the modest activity of PD-1 inhibitors. A potential strategy to convert a cold into an inflamed microenvironment lies on a combination therapy. As tumor angiogenesis promotes immunosuppression, we designed a phase Ib/II trial to test the double inhibition of angiogenesis (sunitinib) and PD-1/PD-L1 axis (nivolumab).

METHODS: This single-arm, phase Ib/II trial enrolled adult patients with selected subtypes of sarcoma. Phase Ib established two dose levels: level 0 with sunitinib 37.5 mg daily from day 1, plus nivolumab 3 mg/kg intravenously on day 15, and then every 2 weeks; and level -1 with sunitinib 37.5 mg on the first 14 days (induction) and then 25 mg per day plus nivolumab on the same schedule. The primary endpoint was to determine the recommended dose for phase II (phase I) and the 6-month progression-free survival rate, according to Response Evaluation Criteria in Solid Tumors 1.1 (phase II).

RESULTS: From May 2017 to April 2019, 68 patients were enrolled: 16 in phase Ib and 52 in phase II. The recommended dose of sunitinib for phase II was 37.5 mg as induction and then 25 mg in combination with nivolumab. After a median follow-up of 17 months (4-26), the 6-month progression-free survival rate was 48\% (95\% CI 41\% to 55\%). The most common grade 3-4 adverse events included transaminitis (17.3\%) and neutropenia (11.5\%).

CONCLUSIONS: Sunitinib plus nivolumab is an active scheme with manageable toxicity in the treatment of selected patients with advanced soft tissue sarcoma, with almost half of patients free of progression at 6 months. NCT03277924.

}, keywords = {Adult, Aged, Antineoplastic Agents, Immunological, Female, Humans, Male, Middle Aged, Nivolumab, Sarcoma, Sunitinib, Young Adult}, issn = {2051-1426}, doi = {10.1136/jitc-2020-001561}, author = {Martin-Broto, Javier and Hindi, Nadia and Grignani, Giovanni and Martinez-Trufero, Javier and Redondo, Andres and Valverde, Claudia and Stacchiotti, Silvia and Lopez-Pousa, Antonio and D{\textquoteright}Ambrosio, Lorenzo and Gutierrez, Antonio and Perez-Vega, Herminia and Encinas-Tobajas, Victor and de Alava, Enrique and Collini, Paola and Pe{\~n}a-Chilet, Maria and Dopazo, Joaquin and Carrasco-Garcia, Irene and Lopez-Alvarez, Maria and Moura, David S and Lopez-Martin, Jose A} } @article {665, title = {Optimised molecular genetic diagnostics of Fanconi anaemia by whole exome sequencing and functional studies.}, journal = {J Med Genet}, volume = {57}, year = {2020}, month = {2020 04}, pages = {258-268}, abstract = {

PURPOSE: Patients with Fanconi anaemia (FA), a rare DNA repair genetic disease, exhibit chromosome fragility, bone marrow failure, malformations and cancer susceptibility. FA molecular diagnosis is challenging since FA is caused by point mutations and large deletions in 22 genes following three heritability patterns. To optimise FA patients{\textquoteright} characterisation, we developed a simplified but effective methodology based on whole exome sequencing (WES) and functional studies.

METHODS: 68 patients with FA were analysed by commercial WES services. Copy number variations were evaluated by sequencing data analysis with RStudio. To test missense variants, wt FANCA cDNA was cloned and variants were introduced by site-directed mutagenesis. Vectors were then tested for their ability to complement DNA repair defects of a FANCA-KO human cell line generated by TALEN technologies.

RESULTS: We identified 93.3\% of mutated alleles including large deletions. We determined the pathogenicity of three FANCA missense variants and demonstrated that two variants reported in mutations databases as {\textquoteright}affecting functions{\textquoteright} are SNPs. Deep analysis of sequencing data revealed patients{\textquoteright} true mutations, highlighting the importance of functional analysis. In one patient, no pathogenic variant could be identified in any of the 22 known FA genes, and in seven patients, only one deleterious variant could be identified (three patients each with FANCA and FANCD2 and one patient with FANCE mutations) CONCLUSION: WES and proper bioinformatics analysis are sufficient to effectively characterise patients with FA regardless of the rarity of their complementation group, type of mutations, mosaic condition and DNA source.

}, keywords = {Cell Line, DNA Copy Number Variations, DNA Repair, DNA-Binding Proteins, Fanconi Anemia, Fanconi Anemia Complementation Group A Protein, Female, Gene Knockout Techniques, Genetic Predisposition to Disease, Humans, Male, Mutation, Missense, Polymorphism, Single Nucleotide, whole exome sequencing}, issn = {1468-6244}, doi = {10.1136/jmedgenet-2019-106249}, author = {Bogliolo, Massimo and Pujol, Roser and Aza-Carmona, Miriam and Mu{\~n}oz-Subirana, N{\'u}ria and Rodriguez-Santiago, Benjamin and Casado, Jos{\'e} Antonio and Rio, Paula and Bauser, Christopher and Reina-Castill{\'o}n, Judith and Lopez-Sanchez, Marcos and Gonzalez-Quereda, Lidia and Gallano, Pia and Catal{\'a}, Albert and Ruiz-Llobet, Ana and Badell, Isabel and Diaz-Heredia, Cristina and Hladun, Raquel and Senent, Leonort and Argiles, Bienvenida and Bergua Burgues, Juan Miguel and Ba{\~n}ez, Fatima and Arrizabalaga, Beatriz and L{\'o}pez Almaraz, Ricardo and Lopez, Monica and Figuera, {\'A}ngela and Molin{\'e}s, Antonio and P{\'e}rez de Soto, Inmaculada and Hernando, In{\'e}s and Mu{\~n}oz, Juan Antonio and Del Rosario Marin, Maria and Balma{\~n}a, Judith and Stjepanovic, Neda and Carrasco, Estela and Cuesta, Isabel and Cosuelo, Jos{\'e} Miguel and Regueiro, Alexandra and Moraleda Jimenez, Jos{\'e} and Galera-Mi{\~n}arro, Ana Maria and Rosi{\~n}ol, Laura and Carri{\'o}, Anna and Bel{\'e}ndez-Bieler, Cristina and Escudero Soto, Antonio and Cela, Elena and de la Mata, Gregorio and Fern{\'a}ndez-Delgado, Rafael and Garcia-Pardos, Maria Carmen and S{\'a}ez-Villaverde, Raquel and Barraga{\~n}o, Marta and Portugal, Raquel and Lendinez, Francisco and Hernadez, Ines and Vagace, Jos{\'e} Manue and Tapia, Maria and Nieto, Jos{\'e} and Garcia, Marta and Gonzalez, Macarena and Vicho, Cristina and Galvez, Eva and Valiente, Alberto and Antelo, Maria Luisa and Ancliff, Phil and Garc{\'\i}a, Francisco and Dopazo, Joaquin and Sevilla, Julian and Paprotka, Tobias and P{\'e}rez-Jurado, Luis Alberto and Bueren, Juan and Surralles, Jordi} } @article {653, title = {Pazopanib for treatment of typical solitary fibrous tumours: a multicentre, single-arm, phase 2 trial.}, journal = {Lancet Oncol}, volume = {21}, year = {2020}, month = {2020 03}, pages = {456-466}, abstract = {

BACKGROUND: Solitary fibrous tumour is an ultra-rare sarcoma, which encompasses different clinicopathological subgroups. The dedifferentiated subgroup shows an aggressive course with resistance to pazopanib, whereas in the malignant subgroup, pazopanib shows higher activity than in previous studies with chemotherapy. We designed a trial to test pazopanib activity in two different cohorts of solitary fibrous tumour: the malignant-dedifferentiated cohort, which was previously published, and the typical cohort, which is presented here.

METHODS: In this single-arm, phase 2 trial, adult patients (aged >=18 years) diagnosed with confirmed metastatic or unresectable typical solitary fibrous tumour of any location, who had progressed in the previous 6 months (by Choi criteria or Response Evaluation Criteria in Solid Tumors [RECIST]) and an Eastern Cooperative Oncology Group (ECOG) performance status of 0-2 were enrolled at 11 tertiary hospitals in Italy, France, and Spain. Patients received pazopanib 800 mg once daily, taken orally, until progression, unacceptable toxicity, withdrawal of consent, non-compliance, or a delay in pazopanib administration of longer than 3 weeks. The primary endpoint was proportion of patients achieving an overall response measured by Choi criteria in patients who received at least 1 month of treatment with at least one radiological assessment. All patients who received at least one dose of the study drug were included in the safety analyses. This study is registered in ClinicalTrials.gov, NCT02066285, and with the European Clinical Trials Database, EudraCT 2013-005456-15.

FINDINGS: From June 26, 2014, to Dec 13, 2018, of 40 patients who were assessed, 34 patients were enrolled and 31 patients were included in the response analysis. Median follow-up was 18 months (IQR 14-34), and 18 (58\%) of 31 patients had a partial response, 12 (39\%) had stable disease, and one (3\%) showed progressive disease according to Choi criteria and central review. The proportion of overall response based on Choi criteria was 58\% (95\% CI 34-69). There were no deaths caused by toxicity, and the most frequent adverse events were diarrhoea (18 [53\%] of 34 patients), fatigue (17 [50\%]), and hypertension (17 [50\%]).

INTERPRETATION: To our knowledge, this is the first prospective trial of pazopanib for advanced typical solitary fibrous tumour. The manageable toxicity and activity shown by pazopanib in this cohort suggest that this drug could be considered as first-line treatment for advanced typical solitary fibrous tumour.

FUNDING: Spanish Group for Research on Sarcomas (GEIS), Italian Sarcoma Group (ISG), French Sarcoma Group (FSG), GlaxoSmithKline, and Novartis.

}, keywords = {Aged, Female, Follow-Up Studies, Humans, Indazoles, Male, Middle Aged, Neoplasm Metastasis, Prognosis, Prospective Studies, Protein Kinase Inhibitors, Pyrimidines, Response Evaluation Criteria in Solid Tumors, Solitary Fibrous Tumors, Sulfonamides, Survival Rate}, issn = {1474-5488}, doi = {10.1016/S1470-2045(19)30826-5}, author = {Martin-Broto, Javier and Cruz, Josefina and Penel, Nicolas and Le Cesne, Axel and Hindi, Nadia and Luna, Pablo and Moura, David S and Bernabeu, Daniel and de Alava, Enrique and Lopez-Guerrero, Jose Antonio and Dopazo, Joaquin and Pe{\~n}a-Chilet, Maria and Gutierrez, Antonio and Collini, Paola and Karanian, Marie and Redondo, Andres and Lopez-Pousa, Antonio and Grignani, Giovanni and Diaz-Martin, Juan and Marcilla, David and Fernandez-Serra, Antonio and Gonzalez-Aguilera, Cristina and Casali, Paolo G and Blay, Jean-Yves and Stacchiotti, Silvia} } @article {692, title = {Towards Improving Skin Cancer Diagnosis by Integrating Microarray and RNA-Seq Datasets.}, journal = {IEEE J Biomed Health Inform}, volume = {24}, year = {2020}, month = {2020 07}, pages = {2119-2130}, abstract = {

Many clinical studies have revealed the high biological similarities existing among different skin pathological states. These similarities create difficulties in the efficient diagnosis of skin cancer, and encourage to study and design new intelligent clinical decision support systems. In this sense, gene expression analysis can help find differentially expressed genes (DEGs) simultaneously discerning multiple skin pathological states in a single test. The integration of multiple heterogeneous transcriptomic datasets requires different pipeline stages to be properly designed: from suitable batch merging and efficient biomarker selection to automated classification assessment. This article presents a novel approach addressing all these technical issues, with the intention of providing new sights about skin cancer diagnosis. Although new future efforts will have to be made in the search for better biomarkers recognizing specific skin pathological states, our study found a panel of 8 highly relevant multiclass DEGs for discerning up to 10 skin pathological states: 2 healthy skin conditions a priori, 2 cataloged precancerous skin diseases and 6 cancerous skin states. Their power of diagnosis over new samples was widely tested by previously well-trained classification models. Robust performance metrics such as overall and mean multiclass F1-score outperformed recognition rates of 94\% and 80\%, respectively. Clinicians should give special attention to highlighted multiclass DEGs that have high gene expression changes present among them, and understand their biological relationship to different skin pathological states.

}, keywords = {Biomarkers, Tumor, Computational Biology, Diagnosis, Computer-Assisted, Gene Expression Profiling, Humans, Machine Learning, RNA-seq, Skin Neoplasms}, issn = {2168-2208}, doi = {10.1109/JBHI.2019.2953978}, author = {Galvez, Juan M and Castillo-Secilla, Daniel and Herrera, Luis J and Valenzuela, Olga and Caba, Octavio and Prados, Jose C and Ortuno, Francisco M and Rojas, Ignacio} } @article {704, title = {Transparency and reproducibility in artificial intelligence.}, journal = {Nature}, volume = {586}, year = {2020}, month = {2020 10}, pages = {E14-E16}, keywords = {Algorithms, Artificial Intelligence, Reproducibility of Results}, issn = {1476-4687}, doi = {10.1038/s41586-020-2766-y}, author = {Haibe-Kains, Benjamin and Adam, George Alexandru and Hosny, Ahmed and Khodakarami, Farnoosh and Waldron, Levi and Wang, Bo and McIntosh, Chris and Goldenberg, Anna and Kundaje, Anshul and Greene, Casey S and Broderick, Tamara and Hoffman, Michael M and Leek, Jeffrey T and Korthauer, Keegan and Huber, Wolfgang and Brazma, Alvis and Pineau, Joelle and Tibshirani, Robert and Hastie, Trevor and Ioannidis, John P A and Quackenbush, John and Aerts, Hugo J W L} } @article {403, title = {A comparison of mechanistic signaling pathway activity analysis methods.}, journal = {Brief Bioinform}, volume = {20}, year = {2019}, month = {2019 09 27}, pages = {1655-1668}, abstract = {

Understanding the aspects of cell functionality that account for disease mechanisms or drug modes of action is a main challenge for precision medicine. Classical gene-based approaches ignore the modular nature of most human traits, whereas conventional pathway enrichment approaches produce only illustrative results of limited practical utility. Recently, a family of new methods has emerged that change the focus from the whole pathways to the definition of elementary subpathways within them that have any mechanistic significance and to the study of their activities. Thus, mechanistic pathway activity (MPA) methods constitute a new paradigm that allows recoding poorly informative genomic measurements into cell activity quantitative values and relate them to phenotypes. Here we provide a review on the MPA methods available and explain their contribution to systems medicine approaches for addressing challenges in the diagnostic and treatment of complex diseases.

}, keywords = {Algorithms, Humans, Postmortem Changes, Signal Transduction, Systems biology, Transcriptome}, issn = {1477-4054}, doi = {10.1093/bib/bby040}, author = {Amadoz, Alicia and Hidalgo, Marta R and Cubuk, Cankut and Carbonell-Caballero, Jos{\'e} and Dopazo, Joaquin} } @article {422, title = {Differential metabolic activity and discovery of therapeutic targets using summarized metabolic pathway models.}, journal = {NPJ Syst Biol Appl}, volume = {5}, year = {2019}, month = {2019}, pages = {7}, abstract = {

In spite of the increasing availability of genomic and transcriptomic data, there is still a gap between the detection of perturbations in gene expression and the understanding of their contribution to the molecular mechanisms that ultimately account for the phenotype studied. Alterations in the metabolism are behind the initiation and progression of many diseases, including cancer. The wealth of available knowledge on metabolic processes can therefore be used to derive mechanistic models that link gene expression perturbations to changes in metabolic activity that provide relevant clues on molecular mechanisms of disease and drug modes of action (MoA). In particular, pathway modules, which recapitulate the main aspects of metabolism, are especially suitable for this type of modeling. We present Metabolizer, a web-based application that offers an intuitive, easy-to-use interactive interface to analyze differences in pathway metabolic module activities that can also be used for class prediction and in silico prediction of knock-out (KO) effects. Moreover, Metabolizer can automatically predict the optimal KO intervention for restoring a diseased phenotype. We provide different types of validations of some of the predictions made by Metabolizer. Metabolizer is a web tool that allows understanding molecular mechanisms of disease or the MoA of drugs within the context of the metabolism by using gene expression measurements. In addition, this tool automatically suggests potential therapeutic targets for individualized therapeutic interventions.

}, keywords = {Computational Biology, Computer Simulation, Drug discovery, Gene Regulatory Networks, Humans, Internet, Metabolic Networks and Pathways, Models, Biological, Neoplasms, Phenotype, Software, Transcriptome}, issn = {2056-7189}, doi = {10.1038/s41540-019-0087-2}, author = {Cubuk, Cankut and Hidalgo, Marta R and Amadoz, Alicia and Rian, Kinza and Salavert, Francisco and Pujana, Miguel A and Mateo, Francesca and Herranz, Carmen and Carbonell-Caballero, Jos{\'e} and Dopazo, Joaquin} } @article {423, title = {Pazopanib for treatment of advanced malignant and dedifferentiated solitary fibrous tumour: a multicentre, single-arm, phase 2 trial.}, journal = {Lancet Oncol}, volume = {20}, year = {2019}, month = {2019 01}, pages = {134-144}, abstract = {

BACKGROUND: A solitary fibrous tumour is a rare soft-tissue tumour with three clinicopathological variants: typical, malignant, and dedifferentiated. Preclinical experiments and retrospective studies have shown different sensitivities of solitary fibrous tumour to chemotherapy and antiangiogenics. We therefore designed a trial to assess the activity of pazopanib in a cohort of patients with malignant or dedifferentiated solitary fibrous tumour. The clinical and translational results are presented here.

METHODS: In this single-arm, phase 2 trial, adult patients (aged >= 18 years) with histologically confirmed metastatic or unresectable malignant or dedifferentiated solitary fibrous tumour at any location, who had progressed (by RECIST and Choi criteria) in the previous 6 months and had an ECOG performance status of 0-2, were enrolled at 16 third-level hospitals with expertise in sarcoma care in Spain, Italy, and France. Patients received pazopanib 800 mg once daily, taken orally without food, at least 1 h before or 2 h after a meal, until progression or intolerance. The primary endpoint of the study was overall response measured by Choi criteria in the subset of the intention-to-treat population (patients who received at least 1 month of treatment with at least one radiological assessment). All patients who received at least one dose of the study drug were included in the safety analyses. This study is registered with ClinicalTrials.gov, number NCT02066285, and with the European Clinical Trials Database, EudraCT number 2013-005456-15.

FINDINGS: From June 26, 2014, to Nov 24, 2016, of 40 patients assessed, 36 were enrolled (34 with malignant solitary fibrous tumour and two with dedifferentiated solitary fibrous tumour). Median follow-up was 27 months (IQR 16-31). Based on central radiology review, 18 (51\%) of 35 evaluable patients had partial responses, nine (26\%) had stable disease, and eight (23\%) had progressive disease according to Choi criteria. Further enrolment of patients with dedifferentiated solitary fibrous tumour was stopped after detection of early and fast progressions in a planned interim analysis. 51\% (95\% CI 34-69) of 35 patients achieved an overall response according to Choi criteria. Ten (29\%) of 35 patients died. There were no deaths related to adverse events and the most frequent grade 3 or higher adverse events were hypertension (11 [31\%] of 36 patients), neutropenia (four [11\%]), increased concentrations of alanine aminotransferase (four [11\%]), and increased concentrations of bilirubin (three [8\%]).

INTERPRETATION: To our knowledge, this is the first trial of pazopanib for treatment of malignant solitary fibrous tumour showing activity in this patient group. The manageable toxicity profile and the activity shown by pazopanib suggests that this drug could be an option for systemic treatment of advanced malignant solitary fibrous tumour, and provides a benchmark for future trials.

FUNDING: Spanish Group for Research on Sarcomas (GEIS), Italian Sarcoma Group (ISG), French Sarcoma Group (FSG), GlaxoSmithKline, and Novartis.

}, keywords = {Adult, Aged, Angiogenesis Inhibitors, Antineoplastic Agents, Female, Humans, Indazoles, Male, Middle Aged, Multivariate Analysis, Pyrimidines, Response Evaluation Criteria in Solid Tumors, Soft Tissue Neoplasms, Solitary Fibrous Tumors, Sulfonamides, Survival Analysis}, issn = {1474-5488}, doi = {10.1016/S1470-2045(18)30676-4}, author = {Martin-Broto, Javier and Stacchiotti, Silvia and Lopez-Pousa, Antonio and Redondo, Andres and Bernabeu, Daniel and de Alava, Enrique and Casali, Paolo G and Italiano, Antoine and Gutierrez, Antonio and Moura, David S and Pe{\~n}a-Chilet, Maria and Diaz-Martin, Juan and Biscuola, Michele and Taron, Miguel and Collini, Paola and Ranchere-Vince, Dominique and Garcia Del Muro, Xavier and Grignani, Giovanni and Dumont, Sarah and Martinez-Trufero, Javier and Palmerini, Emanuela and Hindi, Nadia and Sebio, Ana and Dopazo, Joaquin and Dei Tos, Angelo Paolo and LeCesne, Axel and Blay, Jean-Yves and Cruz, Josefina} } @article {664, title = {Using mechanistic models for the clinical interpretation of complex genomic variation}, journal = {Scientific Reports}, volume = {9}, year = {2019}, month = {Jan-12-2019}, doi = {10.1038/s41598-019-55454-7}, url = {http://www.nature.com/articles/s41598-019-55454-7http://www.nature.com/articles/s41598-019-55454-7.pdfhttp://www.nature.com/articles/s41598-019-55454-7.pdfhttp://www.nature.com/articles/s41598-019-55454-7}, author = {Pe{\~n}a-Chilet, Maria and Esteban-Medina, Marina and Falco, Matias M. and Rian, Kinza and Hidalgo, Marta R. and Loucera, Carlos and Dopazo, Joaquin} } @article {428, title = {A crowdsourced analysis to identify ab initio molecular signatures predictive of susceptibility to viral infection}, journal = {Nature Communications}, volume = {9}, year = {2018}, month = {Jan-12-2018}, doi = {10.1038/s41467-018-06735-8}, url = {http://www.nature.com/articles/s41467-018-06735-8http://www.nature.com/articles/s41467-018-06735-8.pdfhttp://www.nature.com/articles/s41467-018-06735-8.pdfhttp://www.nature.com/articles/s41467-018-06735-8}, author = {Fourati, Slim and Talla, Aarthi and Mahmoudian, Mehrad and Burkhart, Joshua G. and Kl{\'e}n, Riku and Henao, Ricardo and Yu, Thomas and Ayd{\i}n, Zafer and Yeung, Ka Yee and Ahsen, Mehmet Eren and Almugbel, Reem and Jahandideh, Samad and Liang, Xiao and Nordling, Torbj{\"o}rn E. M. and Shiga, Motoki and Stanescu, Ana and Vogel, Robert and Pandey, Gaurav and Chiu, Christopher and McClain, Micah T. and Woods, Christopher W. and Ginsburg, Geoffrey S. and Elo, Laura L. and Tsalik, Ephraim L. and Mangravite, Lara M. and Sieberts, Solveig K.} } @article {397, title = {The effects of death and post-mortem cold ischemia on human tissue transcriptomes.}, journal = {Nat Commun}, volume = {9}, year = {2018}, month = {2018 02 13}, pages = {490}, abstract = {

Post-mortem tissues samples are a key resource for investigating patterns of gene expression. However, the processes triggered by death and the post-mortem interval (PMI) can significantly alter physiologically normal RNA levels. We investigate the impact of PMI on gene expression using data from multiple tissues of post-mortem donors obtained from the GTEx project. We find that many genes change expression over relatively short PMIs in a tissue-specific manner, but this potentially confounding effect in a biological analysis can be minimized by taking into account appropriate covariates. By comparing ante- and post-mortem blood samples, we identify the cascade of transcriptional events triggered by death of the organism. These events do not appear to simply reflect stochastic variation resulting from mRNA degradation, but active and ongoing regulation of transcription. Finally, we develop a model to predict the time since death from the analysis of the transcriptome of a few readily accessible tissues.

}, keywords = {Blood, Cold Ischemia, Death, Female, gene expression, Humans, Models, Biological, Postmortem Changes, RNA, Messenger, Stochastic Processes, Transcriptome}, issn = {2041-1723}, doi = {10.1038/s41467-017-02772-x}, author = {Ferreira, Pedro G and Mu{\~n}oz-Aguirre, Manuel and Reverter, Ferran and S{\'a} Godinho, Caio P and Sousa, Abel and Amadoz, Alicia and Sodaei, Reza and Hidalgo, Marta R and Pervouchine, Dmitri and Carbonell-Caballero, Jos{\'e} and Nurtdinov, Ramil and Breschi, Alessandra and Amador, Raziel and Oliveira, Patr{\'\i}cia and Cubuk, Cankut and Curado, Jo{\~a}o and Aguet, Fran{\c c}ois and Oliveira, Carla and Dopazo, Joaquin and Sammeth, Michael and Ardlie, Kristin G and Guig{\'o}, Roderic} } @article {406, title = {The first complete genomic structure of Butyrivibrio fibrisolvens and its chromid.}, journal = {Microb Genom}, volume = {4}, year = {2018}, month = {2018 10}, abstract = {

Butyrivibrio fibrisolvens forms part of the gastrointestinal microbiome of ruminants and other mammals, including humans. Indeed, it is one of the most common bacteria found in the rumen and plays an important role in ruminal fermentation of polysaccharides, yet, to date, there is no closed reference genome published for this species in any ruminant animal. We successfully assembled the nearly complete genome sequence of B. fibrisolvens strain INBov1 isolated from cow rumen using Illumina paired-end reads, 454 Roche single-end and mate pair sequencing technology. Additionally, we constructed an optical restriction map of this strain to aid in scaffold ordering and positioning, and completed the first genomic structure of this species. Moreover, we identified and assembled the first chromid of this species (pINBov266). The INBov1 genome encodes a large set of genes involved in the cellulolytic process but lacks key genes. This seems to indicate that B. fibrisolvens plays an important role in ruminal cellulolytic processes, but does not have autonomous cellulolytic capacity. When searching for genes involved in the biohydrogenation of unsaturated fatty acids, no linoleate isomerase gene was found in this strain. INBov1 does encode oleate hydratase genes known to participate in the hydrogenation of oleic acids. Furthermore, INBov1 contains an enolase gene, which has been recently determined to participate in the synthesis of conjugated linoleic acids. This work confirms the presence of a novel chromid in B. fibrisolvens and provides a new potential reference genome sequence for this species, providing new insight into its role in biohydrogenation and carbohydrate degradation.

}, keywords = {Animals, Butyrivibrio fibrisolvens, Cattle, Genome, Bacterial, Genomics, Humans, Milk, Rumen, Sequence Analysis, DNA}, issn = {2057-5858}, doi = {10.1099/mgen.0.000216}, author = {Rodr{\'\i}guez Hern{\'a}ez, Javier and Cer{\'o}n Cucchi, Maria Esperanza and Cravero, Silvio and Martinez, Maria Carolina and Gonzalez, Sergio and Puebla, Andrea and Dopazo, Joaquin and Farber, Marisa and Paniego, Norma and Rivarola, M{\'a}ximo} } @article {405, title = {Gene Expression Integration into Pathway Modules Reveals a Pan-Cancer Metabolic Landscape.}, journal = {Cancer Res}, volume = {78}, year = {2018}, month = {2018 11 01}, pages = {6059-6072}, abstract = {

Metabolic reprogramming plays an important role in cancer development and progression and is a well-established hallmark of cancer. Despite its inherent complexity, cellular metabolism can be decomposed into functional modules that represent fundamental metabolic processes. Here, we performed a pan-cancer study involving 9,428 samples from 25 cancer types to reveal metabolic modules whose individual or coordinated activity predict cancer type and outcome, in turn highlighting novel therapeutic opportunities. Integration of gene expression levels into metabolic modules suggests that the activity of specific modules differs between cancers and the corresponding tissues of origin. Some modules may cooperate, as indicated by the positive correlation of their activity across a range of tumors. The activity of many metabolic modules was significantly associated with prognosis at a stronger magnitude than any of their constituent genes. Thus, modules may be classified as tumor suppressors and oncomodules according to their potential impact on cancer progression. Using this modeling framework, we also propose novel potential therapeutic targets that constitute alternative ways of treating cancer by inhibiting their reprogrammed metabolism. Collectively, this study provides an extensive resource of predicted cancer metabolic profiles and dependencies. Combining gene expression with metabolic modules identifies molecular mechanisms of cancer undetected on an individual gene level and allows discovery of new potential therapeutic targets. .

}, keywords = {Cell Line, Tumor, Cluster Analysis, Disease Progression, Gene Expression Profiling, Gene Expression Regulation, Neoplastic, Gene Regulatory Networks, Humans, Kaplan-Meier Estimate, Metabolome, mutation, Neoplasms, Oncogenes, Phenotype, Prognosis, RNA, Small Interfering, Sequence Analysis, RNA, Transcriptome, Treatment Outcome}, issn = {1538-7445}, doi = {10.1158/0008-5472.CAN-17-2705}, author = {Cubuk, Cankut and Hidalgo, Marta R and Amadoz, Alicia and Pujana, Miguel A and Mateo, Francesca and Herranz, Carmen and Carbonell-Caballero, Jos{\'e} and Dopazo, Joaquin} } @article {410, title = {LRH-1 agonism favours an immune-islet dialogue which protects against diabetes mellitus.}, journal = {Nat Commun}, volume = {9}, year = {2018}, month = {2018 04 16}, pages = {1488}, abstract = {

Type 1 diabetes mellitus (T1DM) is due to the selective destruction of islet beta cells by immune cells. Current therapies focused on repressing the immune attack or stimulating beta cell regeneration still have limited clinical efficacy. Therefore, it is timely to identify innovative targets to dampen the immune process, while promoting beta cell survival and function. Liver receptor homologue-1 (LRH-1) is a nuclear receptor that represses inflammation in digestive organs, and protects pancreatic islets against apoptosis. Here, we show that BL001, a small LRH-1 agonist, impedes hyperglycemia progression and the immune-dependent inflammation of pancreas in murine models of T1DM, and beta cell apoptosis in islets of type 2 diabetic patients, while increasing beta cell mass and insulin secretion. Thus, we suggest that LRH-1 agonism favors a dialogue between immune and islet cells, which could be druggable to protect against diabetes mellitus.

}, keywords = {Animals, Apoptosis, Cell Communication, Cell Survival, Diabetes Mellitus, Experimental, Diabetes Mellitus, Type 2, Female, Gene Expression Regulation, Humans, Hypoglycemic Agents, Immunity, Innate, insulin, Insulin-Secreting Cells, Islets of Langerhans, Islets of Langerhans Transplantation, Macrophages, Male, Mice, Mice, Inbred C57BL, Phenalenes, Receptors, Cytoplasmic and Nuclear, Streptozocin, T-Lymphocytes, Regulatory, Transplantation, Heterologous}, issn = {2041-1723}, doi = {10.1038/s41467-018-03943-0}, author = {Cobo-Vuilleumier, Nadia and Lorenzo, Petra I and Rodr{\'\i}guez, Noelia Garc{\'\i}a and Herrera G{\'o}mez, Irene de Gracia and Fuente-Martin, Esther and L{\'o}pez-Noriega, Livia and Mellado-Gil, Jos{\'e} Manuel and Romero-Zerbo, Silvana-Yanina and Baqui{\'e}, Mathurin and Lachaud, Christian Claude and Stifter, Katja and Perdomo, German and Bugliani, Marco and De Tata, Vincenzo and Bosco, Domenico and Parnaud, Geraldine and Pozo, David and Hmadcha, Abdelkrim and Florido, Javier P and Toscano, Miguel G and de Haan, Peter and Schoonjans, Kristina and S{\'a}nchez Palaz{\'o}n, Luis and Marchetti, Piero and Schirmbeck, Reinhold and Mart{\'\i}n-Montalvo, Alejandro and Meda, Paolo and Soria, Bernat and Berm{\'u}dez-Silva, Francisco-Javier and St-Onge, Luc and Gauthier, Benoit R} } @article {404, title = {Models of cell signaling uncover molecular mechanisms of high-risk neuroblastoma and predict disease outcome.}, journal = {Biol Direct}, volume = {13}, year = {2018}, month = {2018 08 22}, pages = {16}, abstract = {

BACKGROUND: Despite the progress in neuroblastoma therapies the mortality of high-risk patients is still high (40-50\%) and the molecular basis of the disease remains poorly known. Recently, a mathematical model was used to demonstrate that the network regulating stress signaling by the c-Jun N-terminal kinase pathway played a crucial role in survival of patients with neuroblastoma irrespective of their MYCN amplification status. This demonstrates the enormous potential of computational models of biological modules for the discovery of underlying molecular mechanisms of diseases.

RESULTS: Since signaling is known to be highly relevant in cancer, we have used a computational model of the whole cell signaling network to understand the molecular determinants of bad prognostic in neuroblastoma. Our model produced a comprehensive view of the molecular mechanisms of neuroblastoma tumorigenesis and progression.

CONCLUSION: We have also shown how the activity of signaling circuits can be considered a reliable model-based prognostic biomarker.

REVIEWERS: This article was reviewed by Tim Beissbarth, Wenzhong Xiao and Joanna Polanska. For the full reviews, please go to the Reviewers{\textquoteright} comments section.

}, keywords = {Computational Biology, Gene Expression Regulation, Neoplastic, Humans, JNK Mitogen-Activated Protein Kinases, Models, Theoretical, Neuroblastoma, Signal Transduction}, issn = {1745-6150}, doi = {10.1186/s13062-018-0219-4}, author = {Hidalgo, Marta R and Amadoz, Alicia and Cubuk, Cankut and Carbonell-Caballero, Jos{\'e} and Dopazo, Joaquin} } @article {407, title = {The modular network structure of the mutational landscape of Acute Myeloid Leukemia.}, journal = {PLoS One}, volume = {13}, year = {2018}, month = {2018}, pages = {e0202926}, abstract = {

Acute myeloid leukemia (AML) is associated with the sequential accumulation of acquired genetic alterations. Although at diagnosis cytogenetic alterations are frequent in AML, roughly 50\% of patients present an apparently normal karyotype (NK), leading to a highly heterogeneous prognosis. Due to this significant heterogeneity, it has been suggested that different molecular mechanisms may trigger the disease with diverse prognostic implications. We performed whole-exome sequencing (WES) of tumor-normal matched samples of de novo AML-NK patients lacking mutations in NPM1, CEBPA or FLT3-ITD to identify new gene mutations with potential prognostic and therapeutic relevance to patients with AML. Novel candidate-genes, together with others previously described, were targeted resequenced in an independent cohort of 100 de novo AML patients classified in the cytogenetic intermediate-risk (IR) category. A mean of 4.89 mutations per sample were detected in 73 genes, 35 of which were mutated in more than one patient. After a network enrichment analysis, we defined a single in silico model and established a set of seed-genes that may trigger leukemogenesis in patients with normal karyotype. The high heterogeneity of gene mutations observed in AML patients suggested that a specific alteration could not be as essential as the interaction of deregulated pathways.

}, keywords = {Adult, Aged, Cytodiagnosis, Female, Gene Regulatory Networks, Genetic Association Studies, Genetic Heterogeneity, Humans, Karyotype, Leukemia, Myeloid, Acute, Male, Middle Aged, mutation, Neoplasm Proteins, Nucleophosmin, Prognosis, whole exome sequencing}, issn = {1932-6203}, doi = {10.1371/journal.pone.0202926}, author = {Ib{\'a}{\~n}ez, Mariam and Carbonell-Caballero, Jos{\'e} and Such, Esperanza and Garc{\'\i}a-Alonso, Luz and Liquori, Alessandro and L{\'o}pez-Pav{\'\i}a, Mar{\'\i}a and LLop, Marta and Alonso, Carmen and Barrag{\'a}n, Eva and G{\'o}mez-Segu{\'\i}, In{\'e}s and Neef, Alexander and Herv{\'a}s, David and Montesinos, Pau and Sanz, Guillermo and Sanz, Miguel Angel and Dopazo, Joaquin and Cervera, Jos{\'e}} } @article {387, title = {HGVA: the Human Genome Variation Archive.}, journal = {Nucleic Acids Res}, volume = {45}, year = {2017}, month = {2017 07 03}, pages = {W189-W194}, abstract = {

High-profile genomic variation projects like the 1000 Genomes project or the Exome Aggregation Consortium, are generating a wealth of human genomic variation knowledge which can be used as an essential reference for identifying disease-causing genotypes. However, accessing these data, contrasting the various studies and integrating those data in downstream analyses remains cumbersome. The Human Genome Variation Archive (HGVA) tackles these challenges and facilitates access to genomic data for key reference projects in a clean, fast and integrated fashion. HGVA provides an efficient and intuitive web-interface for easy data mining, a comprehensive RESTful API and client libraries in Python, Java and JavaScript for fast programmatic access to its knowledge base. HGVA calculates population frequencies for these projects and enriches their data with variant annotation provided by CellBase, a rich and fast annotation solution. HGVA serves as a proof-of-concept of the genome analysis developments being carried out by the University of Cambridge together with UK{\textquoteright}s 100 000 genomes project and the National Institute for Health Research BioResource Rare-Diseases, in particular, deploying open-source for Computational Biology (OpenCB) software platform for storing and analyzing massive genomic datasets.

}, keywords = {Genetic Variation, Genome, Human, Humans, Internet, Software, User-Computer Interface}, issn = {1362-4962}, doi = {10.1093/nar/gkx445}, url = {https://academic.oup.com/nar/article-lookup/doi/10.1093/nar/gkx445}, author = {Lopez, Javier and Coll, Jacobo and Haimel, Matthias and Kandasamy, Swaathi and T{\'a}rraga, Joaqu{\'\i}n and Furio-Tari, Pedro and Bari, Wasim and Bleda, Marta and Rueda, Antonio and Gr{\"a}f, Stefan and Rendon, Augusto and Dopazo, Joaquin and Medina, Ignacio} } @article {434, title = {High throughput estimation of functional cell activities reveals disease mechanisms and predicts relevant clinical outcomes.}, journal = {Oncotarget}, volume = {8}, year = {2017}, month = {2017 Jan 17}, pages = {5160-5178}, abstract = {

Understanding the aspects of the cell functionality that account for disease or drug action mechanisms is a main challenge for precision medicine. Here we propose a new method that models cell signaling using biological knowledge on signal transduction. The method recodes individual gene expression values (and/or gene mutations) into accurate measurements of changes in the activity of signaling circuits, which ultimately constitute high-throughput estimations of cell functionalities caused by gene activity within the pathway. Moreover, such estimations can be obtained either at cohort-level, in case/control comparisons, or personalized for individual patients. The accuracy of the method is demonstrated in an extensive analysis involving 5640 patients from 12 different cancer types. Circuit activity measurements not only have a high diagnostic value but also can be related to relevant disease outcomes such as survival, and can be used to assess therapeutic interventions.

}, keywords = {Computational Biology, gene expression, Gene Regulatory Networks, Humans, mutation, Neoplasms, Precision Medicine, Sequence Analysis, RNA, Signal Transduction}, issn = {1949-2553}, doi = {10.18632/oncotarget.14107}, author = {Hidalgo, Marta R and Cubuk, Cankut and Amadoz, Alicia and Salavert, Francisco and Carbonell-Caballero, Jos{\'e} and Dopazo, Joaquin} } @article {383, title = {Integration of transcriptomic and metabolic data reveals hub transcription factors involved in drought stress response in sunflower (Helianthus annuus L.).}, journal = {Plant Mol Biol}, volume = {94}, year = {2017}, month = {2017 Jul}, pages = {549-564}, abstract = {

By integration of transcriptional and metabolic profiles we identified pathways and hubs transcription factors regulated during drought conditions in sunflower, useful for applications in molecular and/or biotechnological breeding. Drought is one of the most important environmental stresses that effects crop productivity in many agricultural regions. Sunflower is tolerant to drought conditions but the mechanisms involved in this tolerance remain unclear at the molecular level. The aim of this study was to characterize and integrate transcriptional and metabolic pathways related to drought stress in sunflower plants, by using a system biology approach. Our results showed a delay in plant senescence with an increase in the expression level of photosynthesis related genes as well as higher levels of sugars, osmoprotectant amino acids and ionic nutrients under drought conditions. In addition, we identified transcription factors that were upregulated during drought conditions and that may act as hubs in the transcriptional network. Many of these transcription factors belong to families implicated in the drought response in model species. The integration of transcriptomic and metabolomic data in this study, together with physiological measurements, has improved our understanding of the biological responses during droughts and contributes to elucidate the molecular mechanisms involved under this environmental condition. These findings will provide useful biotechnological tools to improve stress tolerance while maintaining crop yield under restricted water availability.

}, keywords = {Chlorophyll, Gene Expression Regulation, Plant, Helianthus, Plant Leaves, Plant Proteins, Protein Array Analysis, RNA, Plant, Stress, Physiological, Transcription Factors, Water}, issn = {1573-5028}, doi = {10.1007/s11103-017-0625-5}, author = {Moschen, Sebasti{\'a}n and Di Rienzo, Julio A and Higgins, Janet and Tohge, Takayuki and Watanabe, Mutsumi and Gonzalez, Sergio and Rivarola, M{\'a}ximo and Garcia-Garcia, Francisco and Dopazo, Joaquin and Hopp, H Esteban and Hoefgen, Rainer and Fernie, Alisdair R and Paniego, Norma and Fernandez, Paula and Heinz, Ruth A} } @article {388, title = {Reference genome assessment from a population scale perspective: an accurate profile of variability and noise.}, journal = {Bioinformatics}, volume = {33}, year = {2017}, month = {2017 Nov 15}, pages = {3511-3517}, abstract = {

Motivation: Current plant and animal genomic studies are often based on newly assembled genomes that have not been properly consolidated. In this scenario, misassembled regions can easily lead to false-positive findings. Despite quality control scores are included within genotyping protocols, they are usually employed to evaluate individual sample quality rather than reference sequence reliability. We propose a statistical model that combines quality control scores across samples in order to detect incongruent patterns at every genomic region. Our model is inherently robust since common artifact signals are expected to be shared between independent samples over misassembled regions of the genome.

Results: The reliability of our protocol has been extensively tested through different experiments and organisms with accurate results, improving state-of-the-art methods. Our analysis demonstrates synergistic relations between quality control scores and allelic variability estimators, that improve the detection of misassembled regions, and is able to find strong artifact signals even within the human reference assembly. Furthermore, we demonstrated how our model can be trained to properly rank the confidence of a set of candidate variants obtained from new independent samples.

Availability and implementation: This tool is freely available at http://gitlab.com/carbonell/ces.

Contact: jcarbonell.cipf@gmail.com or joaquin.dopazo@juntadeandalucia.es.

Supplementary information: Supplementary data are available at Bioinformatics online.

}, keywords = {Animals, Genetic Variation, Genome, Genomics, Genotype, Humans, Models, Statistical, Quality Control, Reproducibility of Results, Software}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btx482}, url = {https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/btx482}, author = {Carbonell-Caballero, Jos{\'e} and Amadoz, Alicia and Alonso, Roberto and Hidalgo, Marta R and Cubuk, Cankut and Conesa, David and L{\'o}pez-Qu{\'\i}lez, Antonio and Dopazo, Joaquin} } @article {1231, title = {Whole exome sequencing coupled with unbiased functional analysis reveals new Hirschsprung disease genes.}, journal = {Genome biology}, volume = {18}, year = {2017}, month = {2017 Mar 08}, pages = {48}, abstract = {BACKGROUND: Hirschsprung disease (HSCR), which is congenital obstruction of the bowel, results from a failure of enteric nervous system (ENS) progenitors to migrate, proliferate, differentiate, or survive within the distal intestine. Previous studies that have searched for genes underlying HSCR have focused on ENS-related pathways and genes not fitting the current knowledge have thus often been ignored. We identify and validate novel HSCR genes using whole exome sequencing (WES), burden tests, in silico prediction, unbiased in vivo analyses of the mutated genes in zebrafish, and expression analyses in zebrafish, mouse, and human. RESULTS: We performed de novo mutation (DNM) screening on 24 HSCR trios. We identify 28 DNMs in 21 different genes. Eight of the DNMs we identified occur in RET, the main HSCR gene, and the remaining 20 DNMs reside in genes not reported in the ENS. Knockdown of all 12 genes with missense or loss-of-function DNMs showed that the orthologs of four genes (DENND3, NCLN, NUP98, and TBATA) are indispensable for ENS development in zebrafish, and these results were confirmed by CRISPR knockout. These genes are also expressed in human and mouse gut and/or ENS progenitors. Importantly, the encoded proteins are linked to neuronal processes shared by the central nervous system and the ENS. CONCLUSIONS: Our data open new fields of investigation into HSCR pathology and provide novel insights into the development of the ENS. Moreover, the study demonstrates that functional analyses of genes carrying DNMs are warranted to delineate the full genetic architecture of rare complex diseases.}, keywords = {Hirschprung, Rare Disease, WES}, issn = {1474-760X}, doi = {10.1186/s13059-017-1174-6}, url = {http://genomebiology.biomedcentral.com/articles/10.1186/s13059-017-1174-6}, author = {Gui, Hongsheng and Schriemer, Duco and Cheng, William W and Chauhan, Rajendra K and Anti{\v n}olo, Guillermo and Berrios, Courtney and Bleda, Marta and Brooks, Alice S and Brouwer, Rutger W W and Burns, Alan J and Cherny, Stacey S and Dopazo, Joaquin and Eggen, Bart J L and Griseri, Paola and Jalloh, Binta and Le, Thuy-Linh and Lui, Vincent C H and Luz{\'o}n-Toro, Berta and Matera, Ivana and Ngan, Elly S W and Pelet, Anna and Ruiz-Ferrer, Macarena and Sham, Pak C and Shepherd, Iain T and So, Man-Ting and Sribudiani, Yunia and Tang, Clara S M and van den Hout, Mirjam C G N and van der Linde, Herma C and van Ham, Tjakko J and van IJcken, Wilfred F J and Verheij, Joke B G M and Amiel, Jeanne and Borrego, Salud and Ceccherini, Isabella and Chakravarti, Aravinda and Lyonnet, Stanislas and Tam, Paul K H and Garcia-Barcel{\'o}, Maria-Merc{\`e} and Hofstra, Robert Mw} } @article {431, title = {Whole exome sequencing coupled with unbiased functional analysis reveals new Hirschsprung disease genes}, journal = {Genome Biology}, volume = {18}, year = {2017}, month = {Jan-12-2017}, doi = {10.1186/s13059-017-1174-6}, url = {http://genomebiology.biomedcentral.com/articles/10.1186/s13059-017-1174-6http://link.springer.com/content/pdf/10.1186/s13059-017-1174-6.pdf}, author = {Gui, Hongsheng and Schriemer, Duco and Cheng, William W. and Chauhan, Rajendra K. and Anti{\v n}olo, Guillermo and Berrios, Courtney and Bleda, Marta and Brooks, Alice S. and Brouwer, Rutger W. W. and Burns, Alan J. and Cherny, Stacey S. and Dopazo, Joaquin and Eggen, Bart J. L. and Griseri, Paola and Jalloh, Binta and Le, Thuy-Linh and Lui, Vincent C. H. and Luz{\'o}n-Toro, Berta and Matera, Ivana and Ngan, Elly S. W. and Pelet, Anna and Ruiz-Ferrer, Macarena and Sham, Pak C. and Shepherd, Iain T. and So, Man-Ting and Sribudiani, Yunia and Tang, Clara S. M. and van den Hout, Mirjam C. G. N. and van der Linde, Herma C. and van Ham, Tjakko J. and van IJcken, Wilfred F. J. and Verheij, Joke B. G. M. and Amiel, Jeanne and Borrego, Salud and Ceccherini, Isabella and Chakravarti, Aravinda and Lyonnet, Stanislas and Tam, Paul K. H. and Garcia-Barcel{\'o}, Maria-Merc{\`e} and Hofstra, Robert M. W.} } @article {1203, title = {Actionable pathways: interactive discovery of therapeutic targets using signaling pathway models.}, journal = {Nucleic acids research}, year = {2016}, month = {2016 May 2}, abstract = {The discovery of actionable targets is crucial for targeted therapies and is also a constituent part of the drug discovery process. The success of an intervention over a target depends critically on its contribution, within the complex network of gene interactions, to the cellular processes responsible for disease progression or therapeutic response. Here we present PathAct, a web server that predicts the effect that interventions over genes (inhibitions or activations that simulate knock-outs, drug treatments or over-expressions) can have over signal transmission within signaling pathways and, ultimately, over the cell functionalities triggered by them. PathAct implements an advanced graphical interface that provides a unique interactive working environment in which the suitability of potentially actionable genes, that could eventually become drug targets for personalized or individualized therapies, can be easily tested. The PathAct tool can be found at: http://pathact.babelomics.org.}, keywords = {actionable genes, Disease mechanism, drug action mechanism, Drug discovery, pathway analysis, personalized medicine, signalling, therapeutic targets}, issn = {1362-4962}, doi = {10.1093/nar/gkw369}, url = {http://nar.oxfordjournals.org/content/early/2016/05/02/nar.gkw369.full}, author = {Salavert, Francisco and Hidago, Marta R and Amadoz, Alicia and Cubuk, Cankut and Medina, Ignacio and Crespo, Daniel and Carbonell-Caballero, Jos{\'e} and Joaqu{\'\i}n Dopazo} } @article {1211, title = {Extension of human lncRNA transcripts by RACE coupled with long-read high-throughput sequencing (RACE-Seq).}, journal = {Nature communications}, volume = {7}, year = {2016}, month = {2016}, pages = {12339}, abstract = {Long non-coding RNAs (lncRNAs) constitute a large, yet mostly uncharacterized fraction of the mammalian transcriptome. Such characterization requires a comprehensive, high-quality annotation of their gene structure and boundaries, which is currently lacking. Here we describe RACE-Seq, an experimental workflow designed to address this based on RACE (rapid amplification of cDNA ends) and long-read RNA sequencing. We apply RACE-Seq to 398 human lncRNA genes in seven tissues, leading to the discovery of 2,556 on-target, novel transcripts. About 60\% of the targeted loci are extended in either 5{\textquoteright} or 3{\textquoteright}, often reaching genomic hallmarks of gene boundaries. Analysis of the novel transcripts suggests that lncRNAs are as long, have as many exons and undergo as much alternative splicing as protein-coding genes, contrary to current assumptions. Overall, we show that RACE-Seq is an effective tool to annotate an organism{\textquoteright}s deep transcriptome, and compares favourably to other targeted sequencing techniques.}, issn = {2041-1723}, doi = {10.1038/ncomms12339}, url = {http://www.nature.com/articles/ncomms12339}, author = {Lagarde, Julien and Uszczynska-Ratajczak, Barbara and Santoyo-L{\'o}pez, Javier and Gonzalez, Jose Manuel and Tapanari, Electra and Mudge, Jonathan M and Steward, Charles A and Wilming, Laurens and Tanzer, Andrea and Howald, C{\'e}dric and Chrast, Jacqueline and Vela-Boza, Alicia and Antonio Rueda and L{\'o}pez-Domingo, Francisco J and Dopazo, Joaquin and Reymond, Alexandre and Guig{\'o}, Roderic and Harrow, Jennifer} } @article {559, title = {Extension of human lncRNA transcripts by RACE coupled with long-read high-throughput sequencing (RACE-Seq)}, journal = {Nature Communications}, volume = {7}, year = {2016}, month = {Jan-11-2016}, doi = {10.1038/ncomms12339}, url = {http://www.nature.com/articles/ncomms12339http://www.nature.com/articles/ncomms12339.pdfhttp://www.nature.com/articles/ncomms12339.pdfhttp://www.nature.com/articles/ncomms12339}, author = {Lagarde, Julien and Uszczynska-Ratajczak, Barbara and Santoyo-L{\'o}pez, Javier and Gonzalez, Jose Manuel and Tapanari, Electra and Mudge, Jonathan M. and Steward, Charles A. and Wilming, Laurens and Tanzer, Andrea and Howald, C{\'e}dric and Chrast, Jacqueline and Vela-Boza, Alicia and Rueda, Antonio and Lopez-Domingo, Francisco J. and Dopazo, Joaquin and Reymond, Alexandre and Guig{\'o}, Roderic and Harrow, Jennifer} } @article {441, title = {Highly sensitive and ultrafast read mapping for RNA-seq analysis.}, journal = {DNA Res}, volume = {23}, year = {2016}, month = {2016 Apr}, pages = {93-100}, abstract = {

As sequencing technologies progress, the amount of data produced grows exponentially, shifting the bottleneck of discovery towards the data analysis phase. In particular, currently available mapping solutions for RNA-seq leave room for improvement in terms of sensitivity and performance, hindering an efficient analysis of transcriptomes by massive sequencing. Here, we present an innovative approach that combines re-engineering, optimization and parallelization. This solution results in a significant increase of mapping sensitivity over a wide range of read lengths and substantial shorter runtimes when compared with current RNA-seq mapping methods available.

}, keywords = {Genomics, High-Throughput Nucleotide Sequencing, Humans, Sensitivity and Specificity, Sequence Analysis, RNA, Transcriptome}, issn = {1756-1663}, doi = {10.1093/dnares/dsv039}, author = {Medina, I and T{\'a}rraga, J and Mart{\'\i}nez, H and Barrachina, S and Castillo, M I and Paschall, J and Salavert-Torres, J and Blanquer-Espert, I and Hern{\'a}ndez-Garc{\'\i}a, V and Quintana-Ort{\'\i}, E S and Dopazo, J} } @article {561, title = {Human DNA methylomes of neurodegenerative diseases show common epigenomic patterns.}, journal = {Transl Psychiatry}, volume = {6}, year = {2016}, month = {2016 Jan 19}, pages = {e718}, abstract = {

Different neurodegenerative disorders often show similar lesions, such as the presence of amyloid plaques, TAU-neurotangles and synuclein inclusions. The genetically inherited forms are rare, so we wondered whether shared epigenetic aberrations, such as those affecting DNA methylation, might also exist. The studied samples were gray matter samples from the prefrontal cortex of control and neurodegenerative disease-associated cases. We performed the DNA methylation analyses of Alzheimer{\textquoteright}s disease, dementia with Lewy bodies, Parkinson{\textquoteright}s disease and Alzheimer-like neurodegenerative profile associated with Down{\textquoteright}s syndrome samples. The DNA methylation landscapes obtained show that neurodegenerative diseases share similar aberrant CpG methylation shifts targeting a defined gene set. Our findings suggest that neurodegenerative disorders might have similar pathogenetic mechanisms that subsequently evolve into different clinical entities. The identified aberrant DNA methylation changes can be used as biomarkers of the disorders and as potential new targets for the development of new therapies.

}, keywords = {Adult, Aged, Aged, 80 and over, DNA Methylation, Epigenomics, Female, Humans, Male, Middle Aged, neurodegenerative diseases, Prefrontal Cortex, Tissue Array Analysis}, issn = {2158-3188}, doi = {10.1038/tp.2015.214}, author = {Sanchez-Mut, J V and Heyn, H and Vidal, E and Moran, S and Sayols, S and Delgado-Morales, R and Schultz, M D and Ansoleaga, B and Garcia-Esparcia, P and Pons-Espinal, M and de Lagran, M M and Dopazo, J and Rabano, A and Avila, J and Dierssen, M and Lott, I and Ferrer, I and Ecker, J R and Esteller, M} } @article {446, title = {Integrating transcriptomic and metabolomic analysis to understand natural leaf senescence in sunflower.}, journal = {Plant Biotechnol J}, volume = {14}, year = {2016}, month = {2016 Feb}, pages = {719-34}, abstract = {

Leaf senescence is a complex process, which has dramatic consequences on crop yield. In sunflower, gap between potential and actual yields reveals the economic impact of senescence. Indeed, sunflower plants are incapable of maintaining their green leaf area over sustained periods. This study characterizes the leaf senescence process in sunflower through a systems biology approach integrating transcriptomic and metabolomic analyses: plants being grown under both glasshouse and field conditions. Our results revealed a correspondence between profile changes detected at the molecular, biochemical and physiological level throughout the progression of leaf senescence measured at different plant developmental stages. Early metabolic changes were detected prior to anthesis and before the onset of the first senescence symptoms, with more pronounced changes observed when physiological and molecular variables were assessed under field conditions. During leaf development, photosynthetic activity and cell growth processes decreased, whereas sucrose, fatty acid, nucleotide and amino acid metabolisms increased. Pathways related to nutrient recycling processes were also up-regulated. Members of the NAC, AP2-EREBP, HB, bZIP and MYB transcription factor families showed high expression levels, and their expression level was highly correlated, suggesting their involvement in sunflower senescence. The results of this study thus contribute to the elucidation of the molecular mechanisms involved in the onset and progression of leaf senescence in sunflower leaves as well as to the identification of candidate genes involved in this process.

}, keywords = {Gas Chromatography-Mass Spectrometry, Gene Expression Profiling, Gene Expression Regulation, Plant, Gene ontology, Genes, Plant, Helianthus, Ions, metabolomics, Oligonucleotide Array Sequence Analysis, Plant Leaves, Principal Component Analysis, RNA, Messenger, Transcription Factors}, issn = {1467-7652}, doi = {10.1111/pbi.12422}, author = {Moschen, Sebasti{\'a}n and Bengoa Luoni, Sof{\'\i}a and Di Rienzo, Julio A and Caro, Mar{\'\i}a Del Pilar and Tohge, Takayuki and Watanabe, Mutsumi and Hollmann, Julien and Gonzalez, Sergio and Rivarola, M{\'a}ximo and Garcia-Garcia, Francisco and Dopazo, Joaquin and Hopp, Horacio Esteban and Hoefgen, Rainer and Fernie, Alisdair R and Paniego, Norma and Fernandez, Paula and Heinz, Ruth A} } @article {454, title = {The transcriptomics of an experimentally evolved plant-virus interaction.}, journal = {Sci Rep}, volume = {6}, year = {2016}, month = {2016 04 26}, pages = {24901}, abstract = {

Models of plant-virus interaction assume that the ability of a virus to infect a host genotype depends on the matching between virulence and resistance genes. Recently, we evolved tobacco etch potyvirus (TEV) lineages on different ecotypes of Arabidopsis thaliana, and found that some ecotypes selected for specialist viruses whereas others selected for generalists. Here we sought to evaluate the transcriptomic basis of such relationships. We have characterized the transcriptomic responses of five ecotypes infected with the ancestral and evolved viruses. Genes and functional categories differentially expressed by plants infected with local TEV isolates were identified, showing heterogeneous responses among ecotypes, although significant parallelism existed among lineages evolved in the same ecotype. Although genes involved in immune responses were altered upon infection, other functional groups were also pervasively over-represented, suggesting that plant resistance genes were not the only drivers of viral adaptation. Finally, the transcriptomic consequences of infection with the generalist and specialist lineages were compared. Whilst the generalist induced very similar perturbations in the transcriptomes of the different ecotypes, the perturbations induced by the specialist were divergent. Plant defense mechanisms were activated when the infecting virus was specialist but they were down-regulated when infecting with generalist.

}, keywords = {Arabidopsis, Ecotype, Gene Expression Profiling, Host-Pathogen Interactions, Potyvirus}, issn = {2045-2322}, doi = {10.1038/srep24901}, author = {Hillung, Julia and Garcia-Garcia, Francisco and Dopazo, Joaquin and Cuevas, Jos{\'e} M and Elena, Santiago F} } @article {558, title = {Whole exome sequencing of Rett syndrome-like patients reveals the mutational diversity of the clinical phenotype.}, journal = {Hum Genet}, volume = {135}, year = {2016}, month = {2016 12}, pages = {1343-1354}, abstract = {

Classical Rett syndrome (RTT) is a neurodevelopmental disorder where most of cases carry MECP2 mutations. Atypical RTT variants involve mutations in CDKL5 and FOXG1. However, a subset of RTT patients remains that do not carry any mutation in the described genes. Whole exome sequencing was carried out in a cohort of 21 female probands with clinical features overlapping with those of RTT, but without mutations in the customarily studied genes. Candidates were functionally validated by assessing the appearance of a neurological phenotype in Caenorhabditis elegans upon disruption of the corresponding ortholog gene. We detected pathogenic variants that accounted for the RTT-like phenotype in 14 (66.6~\%) patients. Five patients were carriers of mutations in genes already known to be associated with other syndromic neurodevelopmental disorders. We determined that the other patients harbored mutations in genes that have not previously been linked to RTT or other neurodevelopmental syndromes, such as the ankyrin repeat containing protein ANKRD31 or the neuronal acetylcholine receptor subunit alpha-5 (CHRNA5). Furthermore, worm assays demonstrated that mutations in the studied candidate genes caused locomotion defects. Our findings indicate that mutations in a variety of genes contribute to the development of RTT-like phenotypes.

}, keywords = {Adolescent, Adult, Animals, Caenorhabditis elegans, Carrier Proteins, Cell Cycle Proteins, Child, Child, Preschool, DNA Mutational Analysis, Exome, Female, Forkhead Transcription Factors, Genetic Variation, High-Throughput Nucleotide Sequencing, Humans, Methyl-CpG-Binding Protein 2, mutation, Nerve Tissue Proteins, Protein Serine-Threonine Kinases, Receptors, Nicotinic, Rett Syndrome}, issn = {1432-1203}, doi = {10.1007/s00439-016-1721-3}, author = {Lucariello, Mario and Vidal, Enrique and Vidal, Silvia and Saez, Mauricio and Roa, Laura and Huertas, Dori and Pineda, Merc{\`e} and Dalf{\'o}, Esther and Dopazo, Joaquin and Jurado, Paola and Armstrong, Judith and Esteller, Manel} } @article {1128, title = {Assessing the impact of mutations found in next generation sequencing data over human signaling pathways.}, journal = {Nucleic acids research}, volume = {43}, number = {W1}, year = {2015}, month = {2015 Apr 16}, pages = {W270-W275}, abstract = {Modern sequencing technologies produce increasingly detailed data on genomic variation. However, conventional methods for relating either individual variants or mutated genes to phenotypes present known limitations given the complex, multigenic nature of many diseases or traits. Here we present PATHiVar, a web-based tool that integrates genomic variation data with gene expression tissue information. PATHiVar constitutes a new generation of genomic data analysis methods that allow studying variants found in next generation sequencing experiment in the context of signaling pathways. Simple Boolean models of pathways provide detailed descriptions of the impact of mutations in cell functionality so as, recurrences in functionality failures can easily be related to diseases, even if they are produced by mutations in different genes. Patterns of changes in signal transmission circuits, often unpredictable from individual genes mutated, correspond to patterns of affected functionalities that can be related to complex traits such as disease progression, drug response, etc. PATHiVar is available at: http://pathivar.babelomics.org.}, keywords = {NGS, pathways, signalling, Systems biology}, issn = {1362-4962}, doi = {10.1093/nar/gkv349}, url = {http://nar.oxfordjournals.org/content/43/W1/W270}, author = {Hernansaiz-Ballesteros, Rosa D and Salavert, Francisco and Sebasti{\'a}n-Leon, Patricia and Alem{\'a}n, Alejandro and Medina, Ignacio and Joaqu{\'\i}n Dopazo} } @article {1129, title = {Babelomics 5.0: functional interpretation for new generations of genomic data.}, journal = {Nucleic acids research}, volume = {43}, number = {W1}, year = {2015}, month = {2015 Apr 20}, pages = {W117-W121}, abstract = {Babelomics has been running for more than one decade offering a user-friendly interface for the functional analysis of gene expression and genomic data. Here we present its fifth release, which includes support for Next Generation Sequencing data including gene expression (RNA-seq), exome or genome resequencing. Babelomics has simplified its interface, being now more intuitive. Improved visualization options, such as a genome viewer as well as an interactive network viewer, have been implemented. New technical enhancements at both, client and server sides, makes the user experience faster and more dynamic. Babelomics offers user-friendly access to a full range of methods that cover: (i) primary data analysis, (ii) a variety of tests for different experimental designs and (iii) different enrichment and network analysis algorithms for the interpretation of the results of such tests in the proper functional context. In addition to the public server, local copies of Babelomics can be downloaded and installed. Babelomics is freely available at: http://www.babelomics.org.}, keywords = {babelomics, data integration, gene set analysis, interactome, network analysis, NGS, RNA-seq, Systems biology, transcriptomics}, issn = {1362-4962}, doi = {10.1093/nar/gkv384}, url = {http://nar.oxfordjournals.org/content/43/W1/W117}, author = {Alonso, Roberto and Salavert, Francisco and Garcia-Garcia, Francisco and Carbonell-Caballero, Jos{\'e} and Bleda, Marta and Garc{\'\i}a-Alonso, Luz and Sanchis-Juan, Alba and Perez-Gil, Daniel and Marin-Garcia, Pablo and S{\'a}nchez, Rub{\'e}n and Cubuk, Cankut and Hidalgo, Marta R and Amadoz, Alicia and Hernansaiz-Ballesteros, Rosa D and Alem{\'a}n, Alejandro and T{\'a}rraga, Joaqu{\'\i}n and Montaner, David and Medina, Ignacio and Dopazo, Joaquin} } @article {1132, title = {Combining tumor genome simulation with crowdsourcing to benchmark somatic single-nucleotide-variant detection.}, journal = {Nature methods}, year = {2015}, month = {2015 May 18}, abstract = {The detection of somatic mutations from cancer genome sequences is key to understanding the genetic basis of disease progression, patient survival and response to therapy. Benchmarking is needed for tool assessment and improvement but is complicated by a lack of gold standards, by extensive resource requirements and by difficulties in sharing personal genomic information. To resolve these issues, we launched the ICGC-TCGA DREAM Somatic Mutation Calling Challenge, a crowdsourced benchmark of somatic mutation detection algorithms. Here we report the BAMSurgeon tool for simulating cancer genomes and the results of 248 analyses of three in silico tumors created with it. Different algorithms exhibit characteristic error profiles, and, intriguingly, false positives show a trinucleotide profile very similar to one found in human tumors. Although the three simulated tumors differ in sequence contamination (deviation from normal cell sequence) and in subclonality, an ensemble of pipelines outperforms the best individual pipeline in all cases. BAMSurgeon is available at https://github.com/adamewing/bamsurgeon/.}, keywords = {cancer, NGS, variant calling}, issn = {1548-7105}, doi = {10.1038/nmeth.3407}, url = {http://www.nature.com/nmeth/journal/vaop/ncurrent/full/nmeth.3407.html}, author = {Ewing, Adam D and Houlahan, Kathleen E and Hu, Yin and Ellrott, Kyle and Caloian, Cristian and Yamaguchi, Takafumi N and Bare, J Christopher and P{\textquoteright}ng, Christine and Waggott, Daryl and Sabelnykova, Veronica Y and Kellen, Michael R and Norman, Thea C and Haussler, David and Friend, Stephen H and Stolovitzky, Gustavo and Margolin, Adam A and Stuart, Joshua M and Boutros, Paul C}, editor = {ICGC-TCGA DREAM Somatic Mutation Calling Challenge participants and Liu Xi and Ninad Dewal and Yu Fan and Wenyi Wang and David Wheeler and Andreas Wilm and Grace Hui Ting and Chenhao Li and Denis Bertrand and Niranjan Nagarajan and Qing-Rong Chen and Chih-Hao Hsu and Ying Hu and Chunhua Yan and Warren Kibbe and Daoud Meerzaman and Kristian Cibulskis and Mara Rosenberg and Louis Bergelson and Adam Kiezun and Amie Radenbaugh and Anne-Sophie Sertier and Anthony Ferrari and Laurie Tonton and Kunal Bhutani and Nancy F Hansen and Difei Wang and Lei Song and Zhongwu Lai and Liao, Yang and Shi, Wei and Carbonell-Caballero, Jos{\'e} and Joaqu{\'\i}n Dopazo and Cheryl C K Lau and Justin Guinney} } @article {460, title = {A Pan-Cancer Catalogue of Cancer Driver Protein Interaction Interfaces.}, journal = {PLoS Comput Biol}, volume = {11}, year = {2015}, month = {2015 Oct}, pages = {e1004518}, abstract = {

Despite their importance in maintaining the integrity of all cellular pathways, the role of mutations on protein-protein interaction (PPI) interfaces as cancer drivers has not been systematically studied. Here we analyzed the mutation patterns of the PPI interfaces from 10,028 proteins in a pan-cancer cohort of 5,989 tumors from 23 projects of The Cancer Genome Atlas (TCGA) to find interfaces enriched in somatic missense mutations. To that end we use e-Driver, an algorithm to analyze the mutation distribution of specific protein functional regions. We identified 103 PPI interfaces enriched in somatic cancer mutations. 32 of these interfaces are found in proteins coded by known cancer driver genes. The remaining 71 interfaces are found in proteins that have not been previously identified as cancer drivers even that, in most cases, there is an extensive literature suggesting they play an important role in cancer. Finally, we integrate these findings with clinical information to show how tumors apparently driven by the same gene have different behaviors, including patient outcomes, depending on which specific interfaces are mutated.

}, keywords = {Animals, Base Sequence, Biomarkers, Tumor, Catalogs as Topic, Chromosome Mapping, Computer Simulation, DNA Mutational Analysis, Genetic Predisposition to Disease, Humans, Models, Genetic, Molecular Sequence Data, mutation, Neoplasm Proteins, Neoplasms, Polymorphism, Single Nucleotide, Protein Interaction Mapping, Signal Transduction}, issn = {1553-7358}, doi = {10.1371/journal.pcbi.1004518}, author = {Porta-Pardo, Eduard and Garc{\'\i}a-Alonso, Luz and Hrabe, Thomas and Dopazo, Joaquin and Godzik, Adam} } @article {1155, title = {Prediction of human population responses to toxic compounds by a collaborative competition.}, journal = {Nature biotechnology}, year = {2015}, month = {2015 Aug 10}, abstract = {The ability to computationally predict the effects of toxic compounds on humans could help address the deficiencies of current chemical safety testing. Here, we report the results from a community-based DREAM challenge to predict toxicities of environmental compounds with potential adverse health effects for human populations. We measured the cytotoxicity of 156 compounds in 884 lymphoblastoid cell lines for which genotype and transcriptional data are available as part of the Tox21 1000 Genomes Project. The challenge participants developed algorithms to predict interindividual variability of toxic response from genomic profiles and population-level cytotoxicity data from structural attributes of the compounds. 179 submitted predictions were evaluated against an experimental data set to which participants were blinded. Individual cytotoxicity predictions were better than random, with modest correlations (Pearson{\textquoteright}s r < 0.28), consistent with complex trait genomic prediction. In contrast, predictions of population-level response to different compounds were higher (r < 0.66). The results highlight the possibility of predicting health risks associated with unknown compounds, although risk estimation accuracy remains suboptimal.}, issn = {1546-1696}, doi = {10.1038/nbt.3299}, url = {http://www.nature.com/nbt/journal/vaop/ncurrent/full/nbt.3299.html}, author = {Eduati, Federica and Mangravite, Lara M and Wang, Tao and Tang, Hao and Bare, J Christopher and Huang, Ruili and Norman, Thea and Kellen, Mike and Menden, Michael P and Yang, Jichen and Zhan, Xiaowei and Zhong, Rui and Xiao, Guanghua and Xia, Menghang and Abdo, Nour and Kosyk, Oksana} } @article {468, title = {PTMcode v2: a resource for functional associations of post-translational modifications within and between proteins.}, journal = {Nucleic Acids Res}, volume = {43}, year = {2015}, month = {2015 Jan}, pages = {D494-502}, abstract = {

The post-translational regulation of proteins is mainly driven by two molecular events, their modification by several types of moieties and their interaction with other proteins. These two processes are interdependent and together are responsible for the function of the protein in a particular cell state. Several databases focus on the prediction and compilation of protein-protein interactions (PPIs) and no less on the collection and analysis of protein post-translational modifications (PTMs), however, there are no resources that concentrate on describing the regulatory role of PTMs in PPIs. We developed several methods based on residue co-evolution and proximity to predict the functional associations of pairs of PTMs that we apply to modifications in the same protein and between two interacting proteins. In order to make data available for understudied organisms, PTMcode v2 (http://ptmcode.embl.de) includes a new strategy to propagate PTMs from validated modified sites through orthologous proteins. The second release of PTMcode covers 19 eukaryotic species from which we collected more than 300,000 experimentally verified PTMs (>1,300,000 propagated) of 69 types extracting the post-translational regulation of >100,000 proteins and >100,000 interactions. In total, we report 8 million associations of PTMs regulating single proteins and over 9.4 million interplays tuning PPIs.

}, keywords = {Databases, Protein, Internet, Protein Interaction Mapping, Protein Processing, Post-Translational}, issn = {1362-4962}, doi = {10.1093/nar/gku1081}, author = {Minguez, Pablo and Letunic, Ivica and Parca, Luca and Garc{\'\i}a-Alonso, Luz and Dopazo, Joaquin and Huerta-Cepas, Jaime and Bork, Peer} } @article {493, title = {The Activation of the Sox2 RR2 Pluripotency Transcriptional Reporter in Human Breast Cancer Cell Lines is Dynamic and Labels Cells with Higher Tumorigenic Potential.}, journal = {Front Oncol}, volume = {4}, year = {2014}, month = {2014}, pages = {308}, abstract = {

The striking similarity displayed at the mechanistic level between tumorigenesis and the generation of induced pluripotent stem cells and the fact that genes and pathways relevant for embryonic development are reactivated during tumor progression highlights the link between pluripotency and cancer. Based on these observations, we tested whether it is possible to use a pluripotency-associated transcriptional reporter, whose activation is driven by the SRR2 enhancer from the Sox2 gene promoter (named S4+ reporter), to isolate cancer stem cells (CSCs) from breast cancer cell lines. The S4+ pluripotency transcriptional reporter allows the isolation of cells with enhanced tumorigenic potential and its activation was switched on and off in the cell lines studied, reflecting a plastic cellular process. Microarray analysis comparing the populations in which the reporter construct is active versus inactive showed that positive cells expressed higher mRNA levels of cytokines (IL-8, IL-6, TNF) and genes (such as ATF3, SNAI2, and KLF6) previously related with the CSC phenotype in breast cancer.

}, issn = {2234-943X}, doi = {10.3389/fonc.2014.00308}, author = {Iglesias, Juan Manuel and Leis, Olatz and P{\'e}rez Ruiz, Est{\'\i}baliz and Gumuzio Barrie, Juan and Garcia-Garcia, Francisco and Aduriz, Ariane and Beloqui, Izaskun and Hernandez-Garcia, Susana and Lopez-Mato, Maria Paz and Dopazo, Joaquin and Pandiella, Atanasio and Menendez, Javier A and Martin, Angel Garcia} } @article {1087, title = {Assessing technical performance in differential gene expression experiments with external spike-in RNA control ratio mixtures.}, journal = {Nature communications}, volume = {5}, year = {2014}, month = {2014}, pages = {5125}, abstract = {There is a critical need for standard approaches to assess, report and compare the technical performance of genome-scale differential gene expression experiments. Here we assess technical performance with a proposed standard {\textquoteright}dashboard{\textquoteright} of metrics derived from analysis of external spike-in RNA control ratio mixtures. These control ratio mixtures with defined abundance ratios enable assessment of diagnostic performance of differentially expressed transcript lists, limit of detection of ratio (LODR) estimates and expression ratio variability and measurement bias. The performance metrics suite is applicable to analysis of a typical experiment, and here we also apply these metrics to evaluate technical performance among laboratories. An interlaboratory study using identical samples shared among 12 laboratories with three different measurement processes demonstrates generally consistent diagnostic power across 11 laboratories. Ratio measurement variability and bias are also comparable among laboratories for the same measurement process. We observe different biases for measurement processes using different mRNA-enrichment protocols.}, keywords = {RNA-seq}, issn = {2041-1723}, doi = {10.1038/ncomms6125}, url = {http://www.nature.com/ncomms/2014/140925/ncomms6125/full/ncomms6125.html}, author = {Munro, Sarah A and Lund, Steven P and Pine, P Scott and Binder, Hans and Clevert, Djork-Arn{\'e} and Ana Conesa and Dopazo, Joaquin and Fasold, Mario and Hochreiter, Sepp and Hong, Huixiao and Jafari, Nadereh and Kreil, David P and Labaj, Pawe{\l} P and Li, Sheng and Liao, Yang and Lin, Simon M and Meehan, Joseph and Mason, Christopher E and Santoyo-L{\'o}pez, Javier and Setterquist, Robert A and Shi, Leming and Shi, Wei and Smyth, Gordon K and Stralis-Pavese, Nancy and Su, Zhenqiang and Tong, Weida and Wang, Charles and Wang, Jian and Xu, Joshua and Ye, Zhan and Yang, Yong and Yu, Ying and Salit, Marc} } @article {1074, title = {A Comprehensive DNA Methylation Profile of Epithelial-to-Mesenchymal Transition.}, journal = {Cancer research}, volume = {74}, number = {19}, year = {2014}, month = {2014 Aug 8}, pages = {5608{\textendash}19}, abstract = {Epithelial-to-mesenchymal transition (EMT) is a plastic process in which fully differentiated epithelial cells are converted into poorly differentiated, migratory and invasive mesenchymal cells and it has been related to the metastasis potential of tumors. This is a reversible process and cells can also eventually undergo mesenchymal-to-epithelial transition (MET). The existence of a dynamic EMT process suggests the involvement of epigenetic shifts in the phenotype. Herein, we obtained the DNA methylomes at single-base resolution of MDCK cells undergoing epithelial-to-mesenchymal transition (EMT) and translated the identified differentially methylated regions (DMRs) to human breast cancer cells undergoing a gain of migratory and invasive capabilities associated with the EMT phenotype. We noticed dynamic and reversible changes of DNA methylation, both on promoter sequences and gene-bodies in association with transcription regulation of EMT-related genes. Most importantly, the identified DNA methylation markers of EMT were present in primary mammary tumors in association with the epithelial or the mesenchymal phenotype of the studied breast cancer samples.}, keywords = {Methyl-Seq, Methylomics, Next Generation Sequencing}, issn = {1538-7445}, doi = {10.1158/0008-5472.CAN-13-3659}, url = {http://www.ncbi.nlm.nih.gov/pubmed/25106427}, author = {Carmona, F Javier and Davalos, Veronica and Vidal, Enrique and Gomez, Antonio and Heyn, Holger and Hashimoto, Yutaka and Vizoso, Miguel and Martinez-Cardus, Anna and Sayols, Sergi and Ferreira, Humberto and Sanchez-Mut, Jose and Moran, Sebastian and Margeli, Mireia and Castella, Eva and Berdasco, Maria and Stefansson, Olafur Andri and Eyfjord, Jorunn E and Gonzalez-Suarez, Eva and Dopazo, Joaquin and Orozco, Modesto and Gut, Ivo and Esteller, Manel} } @article {1083, title = {A New Overgrowth Syndrome is Due to Mutations in RNF125.}, journal = {Human mutation}, volume = {35}, year = {2014}, month = {2014 Sep 5}, pages = {1436{\textendash}1441}, abstract = {Overgrowth syndromes (OGS) are a group of disorders in which all parameters of growth and physical development are above the mean for age and sex. We evaluated a series of 270 families from the Spanish Overgrowth Syndrome Registry with no known overgrowth syndrome. We identified one de novo deletion and three missense mutations in RNF125 in six patients from 4 families with overgrowth, macrocephaly, intellectual disability, mild hydrocephaly, hypoglycaemia and inflammatory diseases resembling Sj{\"o}gren syndrome. RNF125 encodes an E3 ubiquitin ligase and is a novel gene of OGS. Our studies of the RNF125 pathway point to upregulation of RIG-I-IPS1-MDA5 and/or disruption of the PI3K-AKT and interferon signaling pathways as the putative final effectors. This article is protected by copyright. All rights reserved.}, keywords = {NGS, prioritization, Rare Disease}, issn = {1098-1004}, doi = {10.1002/humu.22689}, url = {http://onlinelibrary.wiley.com/doi/10.1002/humu.22689/abstract}, author = {Tenorio, Jair and Mansilla, Alicia and Valencia, Mar{\'\i}a and Mart{\'\i}nez-Glez, V{\'\i}ctor and Romanelli, Valeria and Arias, Pedro and Castrej{\'o}n, Nerea and Poletta, Fernando and Guill{\'e}n-Navarro, Encarna and Gordo, Gema and Mansilla, Elena and Garc{\'\i}a-Santiago, F{\'e} and Gonz{\'a}lez-Casado, Isabel and Vallesp{\'\i}n, Elena and Palomares, Mar{\'\i}a and Mori, Mar{\'\i}a A and Santos-Simarro, Fernando and Garc{\'\i}a-Mi{\~n}aur, Sixto and Fern{\'a}ndez, Luis and Mena, Roc{\'\i}o and Benito-Sanz, Sara and Del Pozo, Angela and Silla, Juan Carlos and Iba{\~n}ez, Kristina and L{\'o}pez-Granados, Eduardo and Mart{\'\i}n-Trujillo, Alex and Montaner, David and Heath, Karen E and Campos-Barros, Angel and Joaqu{\'\i}n Dopazo and Nevado, Juli{\'a}n and Monk, David and Ruiz-P{\'e}rez, V{\'\i}ctor L and Lapunzina, Pablo} } @article {507, title = {Novel genes detected by transcriptional profiling from whole-blood cells in patients with early onset of acute coronary syndrome.}, journal = {Clin Chim Acta}, volume = {421}, year = {2013}, month = {2013 Jun 05}, pages = {184-90}, abstract = {

BACKGROUND: Genome-wide expression analysis using microarrays has been used as a research strategy to discovery new biomarkers and candidate genes for a number of diseases. We aim to find new biomarkers for the prediction of acute coronary syndrome (ACS) with a differentially expressed mRNA profiling approach using whole genomic expression analysis in a peripheral blood cell model from patients with early ACS.

METHODS AND RESULTS: This study was carried out in two phases. On phase 1 a restricted clinical criteria (ACS-Ph1, n=9 and CG-Ph1, n=6) was used in order to select potential mRNA biomarkers candidates. A subsequent phase 2 study was performed using selected phase 1 markers analyzed by RT-qPCR using a larger and independent casuistic (ACS-Ph2, n=74 and CG-Ph2, n=41). A total of 549 genes were found to be differentially expressed in the first 48 h after the ACS-Ph1. Technical and biological validation further confirmed that ALOX15, AREG, BCL2A1, BCL2L1, CA1, COX7B, ECHDC3, IL18R1, IRS2, KCNE1, MMP9, MYL4 and TREML4, are differentially expressed in both phases of this study.

CONCLUSIONS: Transcriptomic analysis by microarray technology demonstrated differential expression during a 48 h time course suggesting a potential use of some of these genes as biomarkers for very early stages of ACS, as well as for monitoring early cardiac ischemic recovery.

}, keywords = {Acute Coronary Syndrome, Acute-Phase Proteins, Adult, biomarkers, Blood Cells, Early Diagnosis, gene expression, Gene Expression Profiling, Humans, Male, Middle Aged, Oligonucleotide Array Sequence Analysis, RNA, Messenger, Transcriptome}, issn = {1873-3492}, doi = {10.1016/j.cca.2013.03.011}, author = {Silbiger, Vivian N and Luchessi, Andr{\'e} D and Hirata, Ros{\'a}rio D C and Lima-Neto, L{\'\i}dio G and Cavichioli, D{\'e}bora and Carracedo, {\'A}ngel and Bri{\'o}n, Maria and Dopazo, Joaquin and Garcia-Garcia, Francisco and Dos Santos, Elizabete S and Ramos, Rui F and Sampaio, Marcelo F and Armaganijan, Dikran and Sousa, Amanda G M R and Hirata, Mario H} } @article {953, title = {Novel genes detected by transcriptional profiling from whole-blood cells in patients with early onset of acute coronary syndrome: Transcriptional profiling of acute coronary syndrome.}, journal = {Clinica chimica acta; international journal of clinical chemistry}, year = {2013}, month = {2013 Mar 24}, abstract = {{BACKGROUND: Genome-wide expression analysis using microarrays has been used as a research strategy to discovery new biomarkers and candidate genes for a number of diseases. We aim to find new biomarkers for the prediction of acute coronary syndrome (ACS) with a differentially expressed mRNA profiling approach using whole genomic expression analysis in a peripheral blood cell model from patients with early ACS. METHODS AND RESULTS: This study was carried out in two phases. On phase 1 a restricted clinical criteria (ACS-Ph1}, issn = {1873-3492}, doi = {10.1016/j.cca.2013.03.011}, author = {Silbiger, Vivian N and Luchessi, Andr{\'e} D and Hirata, Ros{\'a}rio D C and Lima-Neto, L{\'\i}dio G and Cavichioli, D{\'e}bora and Carracedo, {\'A}ngel and Bri{\'o}n, Maria and Joaqu{\'\i}n Dopazo and Garcia-Garcia, Francisco and Dos Santos, Elizabete S and Ramos, Rui F and Sampaio, Marcelo F and Armaganijan, Dikran and Sousa, Amanda G M R and Hirata, Mario H} } @article {1033, title = {Pathways systematically associated to Hirschsprung{\textquoteright}s disease.}, journal = {Orphanet journal of rare diseases}, volume = {8}, year = {2013}, month = {2013 Dec 2}, pages = {187}, abstract = {Despite it has been reported that several loci are involved in Hirschsprung{\textquoteright}s disease, the molecular basis of the disease remains yet essentially unknown. The study of collective properties of modules of functionally-related genes provides an efficient and sensitive statistical framework that can overcome sample size limitations in the study of rare diseases. Here, we present the extension of a previous study of a Spanish series of HSCR trios to an international cohort of 162 HSCR trios to validate the generality of the underlying functional basis of the Hirschsprung{\textquoteright}s disease mechanisms previously found. The Pathway-Based Analysis (PBA) confirms a strong association of gene ontology (GO) modules related to signal transduction and its regulation, enteric nervous system (ENS) formation and other processes related to the disease. In addition, network analysis recovers sub-networks significantly associated to the disease, which contain genes related to the same functionalities, thus providing an independent validation of these findings. The functional profiles of association obtained for patients populations from different countries were compared to each other. While gene associations were different at each series, the main functional associations were identical in all the five populations. These observations would also explain the reported low reproducibility of associations of individual disease genes across populations.}, keywords = {GWAS, Hirschprung, network analysis, Pathway Based Analysis}, issn = {1750-1172}, doi = {10.1186/1750-1172-8-187}, url = {http://www.ojrd.com/content/8/1/187/abstract}, author = {Fern{\'a}ndez, Raquel M and Bleda, Marta and Luz{\'o}n-Toro, Berta and Garc{\'\i}a-Alonso, Luz and Arnold, Stacey and Sribudiani, Yunia and Besmond, Claude and Lantieri, Francesca and Doan, Betty and Ceccherini, Isabella and Lyonnet, Stanislas and Hofstra, Robert Mw and Chakravarti, Aravinda and Anti{\v n}olo, Guillermo and Joaqu{\'\i}n Dopazo and Borrego, Salud} } @article {495, title = {Pathways systematically associated to Hirschsprung{\textquoteright}s disease.}, journal = {Orphanet J Rare Dis}, volume = {8}, year = {2013}, month = {2013 Dec 02}, pages = {187}, abstract = {

Despite it has been reported that several loci are involved in Hirschsprung{\textquoteright}s disease, the molecular basis of the disease remains yet essentially unknown. The study of collective properties of modules of functionally-related genes provides an efficient and sensitive statistical framework that can overcome sample size limitations in the study of rare diseases. Here, we present the extension of a previous study of a Spanish series of HSCR trios to an international cohort of 162 HSCR trios to validate the generality of the underlying functional basis of the Hirschsprung{\textquoteright}s disease mechanisms previously found. The Pathway-Based Analysis (PBA) confirms a strong association of gene ontology (GO) modules related to signal transduction and its regulation, enteric nervous system (ENS) formation and other processes related to the disease. In addition, network analysis recovers sub-networks significantly associated to the disease, which contain genes related to the same functionalities, thus providing an independent validation of these findings. The functional profiles of association obtained for patients populations from different countries were compared to each other. While gene associations were different at each series, the main functional associations were identical in all the five populations. These observations would also explain the reported low reproducibility of associations of individual disease genes across populations.

}, keywords = {Female, Genetic Predisposition to Disease, Genotype, Hirschsprung Disease, Humans, Male, Polymorphism, Single Nucleotide}, issn = {1750-1172}, doi = {10.1186/1750-1172-8-187}, author = {Fern{\'a}ndez, Raquel M and Bleda, Marta and Luz{\'o}n-Toro, Berta and Garc{\'\i}a-Alonso, Luz and Arnold, Stacey and Sribudiani, Yunia and Besmond, Claude and Lantieri, Francesca and Doan, Betty and Ceccherini, Isabella and Lyonnet, Stanislas and Hofstra, Robert Mw and Chakravarti, Aravinda and Anti{\v n}olo, Guillermo and Dopazo, Joaquin and Borrego, Salud} } @article {931, title = {Development, Characterization and Experimental Validation of a Cultivated Sunflower (Helianthus annuus L.) Gene Expression Oligonucleotide Microarray.}, journal = {PloS one}, volume = {7}, year = {2012}, month = {2012}, pages = {e45899}, abstract = {Oligonucleotide-based microarrays with accurate gene coverage represent a key strategy for transcriptional studies in orphan species such as sunflower, H. annuus L., which lacks full genome sequences. The goal of this study was the development and functional annotation of a comprehensive sunflower unigene collection and the design and validation of a custom sunflower oligonucleotide-based microarray. A large scale EST (>130,000 ESTs) curation, assembly and sequence annotation was performed using Blast2GO (www.blast2go.de). The EST assembly comprises 41,013 putative transcripts (12,924 contigs and 28,089 singletons). The resulting Sunflower Unigen Resource (SUR version 1.0) was used to design an oligonucleotide-based Agilent microarray for cultivated sunflower. This microarray includes a total of 42,326 features: 1,417 Agilent controls, 74 control probes for sunflower replicated 10 times (740 controls) and 40,169 different non-control probes. Microarray performance was validated using a model experiment examining the induction of senescence by water deficit. Pre-processing and differential expression analysis of Agilent microarrays was performed using the Bioconductor limma package. The analyses based on p-values calculated by eBayes (p<0.01) allowed the detection of 558 differentially expressed genes between water stress and control conditions; from these, ten genes were further validated by qPCR. Over-represented ontologies were identified using FatiScan in the Babelomics suite. This work generated a curated and trustable sunflower unigene collection, and a custom, validated sunflower oligonucleotide-based microarray using Agilent technology. Both the curated unigene collection and the validated oligonucleotide microarray provide key resources for sunflower genome analysis, transcriptional studies, and molecular breeding for crop improvement.}, issn = {1932-6203}, doi = {10.1371/journal.pone.0045899}, url = {http://www.plosone.org/article/info\%3Adoi\%2F10.1371\%2Fjournal.pone.0045899}, author = {Fernandez, Paula and Soria, Marcelo and Blesa, David and Dirienzo, Julio and Moschen, Sebasti{\'a}n and Rivarola, M{\'a}ximo and Clavijo, Bernardo Jose and Gonzalez, Sergio and Peluffo, Lucila and Pr{\'\i}ncipi, Dario and Dosio, Guillermo and Aguirrezabal, Luis and Garcia-Garcia, Francisco and Ana Conesa and Hopp, Esteban and Joaqu{\'\i}n Dopazo and Heinz, Ruth Amelia and Paniego, Norma} } @article {902, title = {Evolutionary Genomics of Genes Involved in Olfactory Behavior in the Drosophila melanogaster Species Group.}, journal = {Evolutionary bioinformatics online}, volume = {8}, year = {2012}, month = {2012}, pages = {89-104}, abstract = {Previous comparative genomic studies of genes involved in olfactory behavior in Drosophila focused only on particular gene families such as odorant receptor and/or odorant binding proteins. However, olfactory behavior has a complex genetic architecture that is orchestrated by many interacting genes. In this paper, we present a comparative genomic study of olfactory behavior in Drosophila including an extended set of genes known to affect olfactory behavior. We took advantage of the recent burst of whole genome sequences and the development of powerful statistical tools to analyze genomic data and test evolutionary and functional hypotheses of olfactory genes in the six species of the Drosophila melanogaster species group for which whole genome sequences are available. Our study reveals widespread purifying selection and limited incidence of positive selection on olfactory genes. We show that the pace of evolution of olfactory genes is mostly independent of the life cycle stage, and of the number of life cycle stages, in which they participate in olfaction. However, we detected a relationship between evolutionary rates and the position that the gene products occupy in the olfactory system, genes occupying central positions tend to be more constrained than peripheral genes. Finally, we demonstrate that specialization to one host does not seem to be associated with bursts of adaptive evolution in olfactory genes in D. sechellia and D. erecta, the two specialists species analyzed, but rather different lineages have idiosyncratic evolutionary histories in which both historical and ecological factors have been involved.}, issn = {1176-9343}, doi = {10.4137/EBO.S8484}, url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3273929/?tool=pubmed}, author = {Lavagnino, Nicol{\'a}s and Serra, Fran{\c c}ois and Arbiza, Leonardo and Dopazo, Hern{\'a}n and Hasson, Esteban} } @article {950, title = {Identification of yeast genes that confer resistance to chitosan oligosaccharide (COS) using chemogenomics.}, journal = {BMC genomics}, volume = {13}, year = {2012}, month = {2012}, pages = {267}, abstract = {BACKGROUND: Chitosan oligosaccharide (COS), a deacetylated derivative of chitin, is an abundant, and renewable natural polymer. COS has higher antimicrobial properties than chitosan and is presumed to act by disrupting/permeabilizing the cell membranes of bacteria, yeast and fungi. COS is relatively non-toxic to mammals. By identifying the molecular and genetic targets of COS, we hope to gain a better understanding of the antifungal mode of action of COS. RESULTS: Three different chemogenomic fitness assays, haploinsufficiency (HIP), homozygous deletion (HOP), and multicopy suppression (MSP) profiling were combined with a transcriptomic analysis to gain insight in to the mode of action and mechanisms of resistance to chitosan oligosaccharides. The fitness assays identified 39 yeast deletion strains sensitive to COS and 21 suppressors of COS sensitivity. The genes identified are involved in processes such as RNA biology (transcription, translation and regulatory mechanisms), membrane functions (e.g. signalling, transport and targeting), membrane structural components, cell division, and proteasome processes. The transcriptomes of control wild type and 5 suppressor strains overexpressing ARL1, BCK2, ERG24, MSG5, or RBA50, were analyzed in the presence and absence of COS. Some of the up-regulated transcripts in the suppressor overexpressing strains exposed to COS included genes involved in transcription, cell cycle, stress response and the Ras signal transduction pathway. Down-regulated transcripts included those encoding protein folding components and respiratory chain proteins. The COS-induced transcriptional response is distinct from previously described environmental stress responses (i.e. thermal, salt, osmotic and oxidative stress) and pre-treatment with these well characterized environmental stressors provided little or any resistance to COS. CONCLUSIONS: Overexpression of the ARL1 gene, a member of the Ras superfamily that regulates membrane trafficking, provides protection against COS-induced cell membrane permeability and damage. We found that the ARL1 COS-resistant over-expression strain was as sensitive to Amphotericin B, Fluconazole and Terbinafine as the wild type cells and that when COS and Fluconazole are used in combination they act in a synergistic fashion. The gene targets of COS identified in this study indicate that COS{\textquoteright}s mechanism of action is different from other commonly studied fungicides that target membranes, suggesting that COS may be an effective fungicide for drug-resistant fungal pathogens.}, issn = {1471-2164}, doi = {10.1186/1471-2164-13-267}, author = {Jaime, Mar{\'\i}a D L A and Lopez-Llorca, Luis Vicente and Ana Conesa and Lee, Anna Y and Proctor, Michael and Heisler, Lawrence E and Gebbia, Marinella and Giaever, Guri and Westwood, J Timothy and Nislow, Corey} } @article {939, title = {Select your SNPs (SYSNPs): a web tool for automatic and massive selection of SNPs.}, journal = {International journal of data mining and bioinformatics}, volume = {6}, year = {2012}, month = {2012}, pages = {324-34}, abstract = {Association studies are the choice approach in the discovery of the genomic basis of complex traits. To carry out such analysis, researchers frequently need to (1) select optimally informative sets of Single Nucleotide Polymorphisms (SNPs) in candidate regions and (2) annotate the results of associations found by means of genome-wide SNP arrays. These are complex tasks, since many criteria have to be considered, including the SNPs{\textquoteright} functional properties, technological information and haplotype frequencies in given populations. SYSNPs implements algorithms that allow for efficient and simultaneous consideration of all the relevant criteria to obtain sets of SNPs that properly cover arbitrarily large lists of genes or genomic regions. Complementarily, SYSNPs allows for comprehensive functional annotation of SNPs linked to any given marker SNP. SYSNPs dramatically reduces the effort needed for SNP selection from days of searching various databases to a few minutes using a simple browser.}, issn = {1748-5673}, url = {http://inderscience.metapress.com/content/f76740x8071u513n/}, author = {Lorente-Galdos, Bel{\'e}n and Medina, Ignacio and Morcillo-Suarez, Carlos and Heredia, Txema and Carre{\~n}o-Torres, Angel and Sangr{\'o}s, Ricardo and Alegre, Josep and Pita, Guillermo and Vellalta, Gemma and Malats, Nuria and Pisano, David G and Joaqu{\'\i}n Dopazo and Navarro, Arcadi} } @article {949, title = {Transcriptome profiling of the intoxication response of Tenebrio molitor larvae to Bacillus thuringiensis Cry3Aa protoxin.}, journal = {PloS one}, volume = {7}, year = {2012}, month = {2012}, pages = {e34624}, abstract = {Bacillus thuringiensis (Bt) crystal (Cry) proteins are effective against a select number of insect pests, but improvements are needed to increase efficacy and decrease time to mortality for coleopteran pests. To gain insight into the Bt intoxication process in Coleoptera, we performed RNA-Seq on cDNA generated from the guts of Tenebrio molitor larvae that consumed either a control diet or a diet containing Cry3Aa protoxin. Approximately 134,090 and 124,287 sequence reads from the control and Cry3Aa-treated groups were assembled into 1,318 and 1,140 contigs, respectively. Enrichment analyses indicated that functions associated with mitochondrial respiration, signalling, maintenance of cell structure, membrane integrity, protein recycling/synthesis, and glycosyl hydrolases were significantly increased in Cry3Aa-treated larvae, whereas functions associated with many metabolic processes were reduced, especially glycolysis, tricarboxylic acid cycle, and fatty acid synthesis. Microarray analysis was used to evaluate temporal changes in gene expression after 6, 12 or 24 h of Cry3Aa exposure. Overall, microarray analysis indicated that transcripts related to allergens, chitin-binding proteins, glycosyl hydrolases, and tubulins were induced, and those related to immunity and metabolism were repressed in Cry3Aa-intoxicated larvae. The 24 h microarray data validated most of the RNA-Seq data. Of the three intoxication intervals, larvae demonstrated more differential expression of transcripts after 12 h exposure to Cry3Aa. Gene expression examined by three different methods in control vs. Cry3Aa-treated larvae at the 24 h time point indicated that transcripts encoding proteins with chitin-binding domain 3 were the most differentially expressed in Cry3Aa-intoxicated larvae. Overall, the data suggest that T. molitor larvae mount a complex response to Cry3Aa during the initial 24 h of intoxication. Data from this study represent the largest genetic sequence dataset for T. molitor to date. Furthermore, the methods in this study are useful for comparative analyses in organisms lacking a sequenced genome.}, keywords = {Administration, Animals, Bacterial Proteins, Base Sequence, Biosynthetic Pathways, Complementary, DNA, Endotoxins, Energy Metabolism, Gene Expression Profiling, Hemolysin Proteins, Larva, Microarray Analysis, Molecular Sequence Data, Oral, Sequence Analysis, Tenebrio, Time Factors, Transcriptome}, issn = {1932-6203}, doi = {10.1371/journal.pone.0034624}, author = {Oppert, Brenda and Dowd, Scot E and Bouffard, Pascal and Li, Lewyn and Ana Conesa and Lorenzen, Marc{\'e} D and Toutges, Michelle and Marshall, Jeremy and Huestis, Diana L and Fabrick, Jeff and Oppert, Cris and Jurat-Fuentes, Juan Luis} } @article {900, title = {Using GPUs for the Exact Alignment of Short-read Genetic Sequences by Means of the Burrows{\textendash}Wheeler Transform.}, journal = {IEEE/ACM transactions on computational biology and bioinformatics / IEEE, ACM}, volume = {9}, number = {4}, year = {2012}, month = {2012 Mar 20}, pages = {1245-1256}, abstract = {General Purpose Graphic Processing Units (GPGPUs) constitute an inexpensive resource for computing-intensive applications that could exploit an intrinsic fine-grain parallelism. This paper presents the design and implementation in GPGPUs of an exact alignment tool for nucleotide sequences based on the Burrows-Wheeler Transform. We compare this algorithm with state-of-the-art implementations of the same algorithm over standard CPUs, and considering the same conditions in terms of I/O. Excluding disk transfers, the implementation of the algorithm in GPUs shows a speedup larger than 12x, when compared to CPU execution. This implementation exploits the parallelism by concurrently searching different sequences on the same reference search tree, maximising memory locality and ensuring a symmetric access to the data. The article describes the behaviour of the algorithm in GPU, showing a good scalability in the performance, only limited by the size of the GPU inner memory.}, keywords = {Burrows-Wheeler transform, CPU execution, GPGPU, NGS}, issn = {1557-9964}, doi = {10.1109/TCBB.2012.49}, url = {http://ieeexplore.ieee.org.sire.ub.edu/xpl/articleDetails.jsp?reload=true\&arnumber=6175888}, author = {Salavert Torres, Jose and Blanquer Espert, Ignacio and Tomas Dominguez, Andres and Hernendez, Vicente and Medina, Ignacio and Terraga, Joaquin and Dopazo, Joaquin} } @article {524, title = {Whole-genome bisulfite DNA sequencing of a DNMT3B mutant patient.}, journal = {Epigenetics}, volume = {7}, year = {2012}, month = {2012 Jun 01}, pages = {542-50}, abstract = {

The immunodeficiency, centromere instability and facial anomalies (ICF) syndrome is associated to mutations of the DNA methyl-transferase DNMT3B, resulting in a reduction of enzyme activity. Aberrant expression of immune system genes and hypomethylation of pericentromeric regions accompanied by chromosomal instability were determined as alterations driving the disease phenotype. However, so far only technologies capable to analyze single loci were applied to determine epigenetic alterations in ICF patients. In the current study, we performed whole-genome bisulphite sequencing to assess alteration in DNA methylation at base pair resolution. Genome-wide we detected a decrease of methylation level of 42\%, with the most profound changes occurring in inactive heterochromatic regions, satellite repeats and transposons. Interestingly, transcriptional active loci and ribosomal RNA repeats escaped global hypomethylation. Despite a genome-wide loss of DNA methylation the epigenetic landscape and crucial regulatory structures were conserved. Remarkably, we revealed a mislocated activity of mutant DNMT3B to H3K4me1 loci resulting in hypermethylation of active promoters. Functionally, we could associate alterations in promoter methylation with the ICF syndrome immunodeficient phenotype by detecting changes in genes related to the B-cell receptor mediated maturation pathway.

}, keywords = {B-Lymphocytes, Cell Line, Transformed, Child, Preschool, DNA (Cytosine-5-)-Methyltransferases, DNA Methylation, Epigenesis, Genetic, Face, Female, Genome, Human, High-Throughput Nucleotide Sequencing, Humans, Immunologic Deficiency Syndromes, mutation, Primary Immunodeficiency Diseases, Sequence Analysis, DNA, Sulfites}, issn = {1559-2308}, doi = {10.4161/epi.20523}, author = {Heyn, Holger and Vidal, Enrique and Sayols, Sergi and Sanchez-Mut, Jose V and Moran, Sebastian and Medina, Ignacio and Sandoval, Juan and Sim{\'o}-Riudalbas, Laia and Szczesna, Karolina and Huertas, Dori and Gatto, Sole and Matarazzo, Maria R and Dopazo, Joaquin and Esteller, Manel} } @article {22039362, title = {Discovery of an ebolavirus-like filovirus in europe.}, journal = {PLoS pathogens}, volume = {7}, year = {2011}, month = {2011 Oct}, pages = {e1002304}, abstract = {

Filoviruses, amongst the most lethal of primate pathogens, have only been reported as natural infections in sub-Saharan Africa and the Philippines. Infections of bats with the ebolaviruses and marburgviruses do not appear to be associated with disease. Here we report identification in dead insectivorous bats of a genetically distinct filovirus, provisionally named Lloviu virus, after the site of detection, Cueva del Lloviu, in Spain.

}, author = {Negredo, Ana and Palacios, Gustavo and V{\'a}zquez-Mor{\'o}n, Sonia and Gonz{\'a}lez, F{\'e}lix and Dopazo, Hern{\'a}n and Molero, Francisca and Juste, Javier and Quetglas, Juan and Savji, Nazir and de la Cruz Mart{\'\i}nez, Maria and Herrera, Jesus Enrique and Pizarro, Manuel and Hutchison, Stephen K and Echevarr{\'\i}a, Juan E and Lipkin, W Ian and Tenorio, Antonio} } @article {533, title = {Evidence for short-time divergence and long-time conservation of tissue-specific expression after gene duplication.}, journal = {Brief Bioinform}, volume = {12}, year = {2011}, month = {2011 Sep}, pages = {442-8}, abstract = {

Gene duplication is one of the main mechanisms by which genomes can acquire novel functions. It has been proposed that the retention of gene duplicates can be associated to processes of tissue expression divergence. These models predict that acquisition of divergent expression patterns should be acquired shortly after the duplication, and that larger divergence in tissue expression would be expected for paralogs, as compared to orthologs of a similar age. Many studies have shown that gene duplicates tend to have divergent expression patterns and that gene family expansions are associated with high levels of tissue specificity. However, the timeframe in which these processes occur have rarely been investigated in detail, particularly in vertebrates, and most analyses do not include direct comparisons of orthologs as a baseline for the expected levels of tissue specificity in absence of duplications. To assess the specific contribution of duplications to expression divergence, we combine here phylogenetic analyses and expression data from human and mouse. In particular, we study differences in spatial expression among human-mouse paralogs, specifically duplicated after the radiation of mammals, and compare them to pairs of orthologs in the same species. Our results show that gene duplication leads to increased levels of tissue specificity and that this tends to occur promptly after the duplication event.

}, keywords = {Animals, Conserved Sequence, Evolution, Molecular, Gene Duplication, gene expression, Genome, Humans, Mice, Organ Specificity}, issn = {1477-4054}, doi = {10.1093/bib/bbr022}, author = {Huerta-Cepas, Jaime and Dopazo, Joaquin and Huynen, Martijn A and Gabald{\'o}n, Toni} } @article {539, title = {Phylemon 2.0: a suite of web-tools for molecular evolution, phylogenetics, phylogenomics and hypotheses testing.}, journal = {Nucleic Acids Res}, volume = {39}, year = {2011}, month = {2011 Jul}, pages = {W470-4}, abstract = {

Phylemon 2.0 is a new release of the suite of web tools for molecular evolution, phylogenetics, phylogenomics and hypotheses testing. It has been designed as a response to the increasing demand of molecular sequence analyses for experts and non-expert users. Phylemon 2.0 has several unique features that differentiates it from other similar web resources: (i) it offers an integrated environment that enables evolutionary analyses, format conversion, file storage and edition of results; (ii) it suggests further analyses, thereby guiding the users through the web server; and (iii) it allows users to design and save phylogenetic pipelines to be used over multiple genes (phylogenomics). Altogether, Phylemon 2.0 integrates a suite of 30 tools covering sequence alignment reconstruction and trimming; tree reconstruction, visualization and manipulation; and evolutionary hypotheses testing.

}, keywords = {Evolution, Molecular, Genomics, Internet, Phylogeny, Sequence Alignment, Software}, issn = {1362-4962}, doi = {10.1093/nar/gkr408}, author = {S{\'a}nchez, Rub{\'e}n and Serra, Fran{\c c}ois and T{\'a}rraga, Joaqu{\'\i}n and Medina, Ignacio and Carbonell, Jos{\'e} and Pulido, Luis and De Maria, Alejandro and Capella-Gut{\'\i}errez, Salvador and Huerta-Cepas, Jaime and Gabald{\'o}n, Toni and Dopazo, Joaquin and Dopazo, Hern{\'a}n} } @article {20028698, title = {Changes in the pattern of DNA methylation associate with twin discordance in systemic lupus erythematosus.}, journal = {Genome research}, volume = {20}, year = {2010}, month = {2010 Feb}, pages = {170-9}, abstract = {

Monozygotic (MZ) twins are partially concordant for most complex diseases, including autoimmune disorders. Whereas phenotypic concordance can be used to study heritability, discordance suggests the role of non-genetic factors. In autoimmune diseases, environmentally driven epigenetic changes are thought to contribute to their etiology. Here we report the first high-throughput and candidate sequence analyses of DNA methylation to investigate discordance for autoimmune disease in twins. We used a cohort of MZ twins discordant for three diseases whose clinical signs often overlap: systemic lupus erythematosus (SLE), rheumatoid arthritis, and dermatomyositis. Only MZ twins discordant for SLE featured widespread changes in the DNA methylation status of a significant number of genes. Gene ontology analysis revealed enrichment in categories associated with immune function. Individual analysis confirmed the existence of DNA methylation and expression changes in genes relevant to SLE pathogenesis. These changes occurred in parallel with a global decrease in the 5-methylcytosine content that was concomitantly accompanied with changes in DNA methylation and expression levels of ribosomal RNA genes, although no changes in repetitive sequences were found. Our findings not only identify potentially relevant DNA methylation markers for the clinical characterization of SLE patients but also support the notion that epigenetic changes may be critical in the clinical manifestations of autoimmune disease.

}, author = {Javierre, Biola M and Fernandez, Agustin F and Richter, Julia and Fatima Al-Shahrour and Martin-Subero, J Ignacio and Rodriguez-Ubreva, Javier and Berdasco, Maria and Fraga, Mario F and O{\textquoteright}Hanlon, Terrance P and Rider, Lisa G and Jacinto, Filipe V and Lopez-Longo, F Javier and Dopazo, Joaquin and Forn, Marta and Peinado, Miguel A and Carre{\~n}o, Luis and Sawalha, Amr H and Harley, John B and Siebert, Reiner and Esteller, Manel and Miller, Frederick W and Ballestar, Esteban} } @article {548, title = {ETE: a python Environment for Tree Exploration.}, journal = {BMC Bioinformatics}, volume = {11}, year = {2010}, month = {2010 Jan 13}, pages = {24}, abstract = {

BACKGROUND: Many bioinformatics analyses, ranging from gene clustering to phylogenetics, produce hierarchical trees as their main result. These are used to represent the relationships among different biological entities, thus facilitating their analysis and interpretation. A number of standalone programs are available that focus on tree visualization or that perform specific analyses on them. However, such applications are rarely suitable for large-scale surveys, in which a higher level of automation is required. Currently, many genome-wide analyses rely on tree-like data representation and hence there is a growing need for scalable tools to handle tree structures at large scale.

RESULTS: Here we present the Environment for Tree Exploration (ETE), a python programming toolkit that assists in the automated manipulation, analysis and visualization of hierarchical trees. ETE libraries provide a broad set of tree handling options as well as specific methods to analyze phylogenetic and clustering trees. Among other features, ETE allows for the independent analysis of tree partitions, has support for the extended newick format, provides an integrated node annotation system and permits to link trees to external data such as multiple sequence alignments or numerical arrays. In addition, ETE implements a number of built-in analytical tools, including phylogeny-based orthology prediction and cluster validation techniques. Finally, ETE{\textquoteright}s programmable tree drawing engine can be used to automate the graphical rendering of trees with customized node-specific visualizations.

CONCLUSIONS: ETE provides a complete set of methods to manipulate tree data structures that extends current functionality in other bioinformatic toolkits of a more general purpose. ETE is free software and can be downloaded from http://ete.cgenomics.org.

}, keywords = {Computational Biology, Databases, Genetic, Phylogeny, Software}, issn = {1471-2105}, doi = {10.1186/1471-2105-11-24}, author = {Huerta-Cepas, Jaime and Dopazo, Joaquin and Gabald{\'o}n, Toni} } @article {20164864, title = {Fine-scale evolution: genomic, phenotypic and ecological differentiation in two coexisting Salinibacter ruber strains.}, journal = {The ISME journal}, year = {2010}, month = {2010 Feb 18}, abstract = {

Genomic and metagenomic data indicate a high degree of genomic variation within microbial populations, although the ecological and evolutive meaning of this microdiversity remains unknown. Microevolution analyses, including genomic and experimental approaches, are so far very scarce for non-pathogenic bacteria. In this study, we compare the genomes, metabolomes and selected ecological traits of the strains M8 and M31 of the hyperhalophilic bacterium Salinibacter ruber that contain ribosomal RNA (rRNA) gene and intergenic regions that are identical in sequence and were simultaneously isolated from a Mediterranean solar saltern. Comparative analyses indicate that S. ruber genomes present a mosaic structure with conserved and hypervariable regions (HVRs). The HVRs or genomic islands, are enriched in transposases, genes related to surface properties, strain-specific genes and highly divergent orthologous. However, the many indels outside the HVRs indicate that genome plasticity extends beyond them. Overall, 10\% of the genes encoded in the M8 genome are absent from M31 and could stem from recent acquisitions. S. ruber genomes also harbor 34 genes located outside HVRs that are transcribed during standard growth and probably derive from lateral gene transfers with Archaea preceding the M8/M31 divergence. Metabolomic analyses, phage susceptibility and competition experiments indicate that these genomic differences cannot be considered neutral from an ecological perspective. The results point to the avoidance of competition by micro-niche adaptation and response to viral predation as putative major forces that drive microevolution within these Salinibacter strains. In addition, this work highlights the extent of bacterial functional diversity and environmental adaptation, beyond the resolution of the 16S rRNA and internal transcribed spacers regions.The ISME Journal advance online publication, 18 February 2010; doi:10.1038/ismej.2010.6.

}, author = {Pe{\~n}a, Arantxa and Teeling, Hanno and Huerta-Cepas, Jaime and Santos, Fernando and Yarza, Pablo and Brito-Echeverr{\'\i}a, Jocelyn and Lucio, Marianna and Schmitt-Kopplin, Philippe and Meseguer, Inmaculada and Schenowitz, Chantal and Dossat, Carole and Barbe, Valerie and Joaqu{\'\i}n Dopazo and Rossell{\'o}-Mora, Ramon and Sch{\"u}ler, Margarete and Gl{\"o}ckner, Frank Oliver and Amann, Rudolf and Gabald{\'o}n, Toni and Ant{\'o}n, Josefa} } @article {20676074, title = {The MicroArray Quality Control (MAQC)-II study of common practices for the development and validation of microarray-based predictive models.}, journal = {Nature biotechnology}, volume = {28}, year = {2010}, month = {2010 Aug}, pages = {827-38}, abstract = {

Gene expression data from microarrays are being applied to predict preclinical and clinical endpoints, but the reliability of these predictions has not been established. In the MAQC-II project, 36 independent teams analyzed six microarray data sets to generate predictive models for classifying a sample with respect to one of 13 endpoints indicative of lung or liver toxicity in rodents, or of breast cancer, multiple myeloma or neuroblastoma in humans. In total, \>30,000 models were built using many combinations of analytical methods. The teams generated predictive models without knowing the biological meaning of some of the endpoints and, to mimic clinical reality, tested the models on data that had not been used for training. We found that model performance depended largely on the endpoint and team proficiency and that different approaches generated models of similar performance. The conclusions and recommendations from MAQC-II should be useful for regulatory agencies, study committees and independent investigators that evaluate methods for global gene expression analysis.

}, url = {http://www.nature.com/nbt/journal/v28/n8/full/nbt.1665.html}, author = {Shi, Leming and Campbell, Gregory and Jones, Wendell D and Campagne, Fabien and Wen, Zhining and Walker, Stephen J and Su, Zhenqiang and Chu, Tzu-Ming and Goodsaid, Federico M and Pusztai, Lajos and Shaughnessy, John D and Oberthuer, Andr{\'e} and Thomas, Russell S and Paules, Richard S and Fielden, Mark and Barlogie, Bart and Chen, Weijie and Du, Pan and Fischer, Matthias and Furlanello, Cesare and Gallas, Brandon D and Ge, Xijin and Megherbi, Dalila B and Symmans, W Fraser and Wang, May D and Zhang, John and Bitter, Hans and Brors, Benedikt and Bushel, Pierre R and Bylesjo, Max and Chen, Minjun and Cheng, Jie and Cheng, Jing and Chou, Jeff and Davison, Timothy S and Delorenzi, Mauro and Deng, Youping and Devanarayan, Viswanath and Dix, David J and Dopazo, Joaquin and Dorff, Kevin C and Elloumi, Fathi and Fan, Jianqing and Fan, Shicai and Fan, Xiaohui and Fang, Hong and Gonzaludo, Nina and Hess, Kenneth R and Hong, Huixiao and Huan, Jun and Irizarry, Rafael A and Judson, Richard and Juraeva, Dilafruz and Lababidi, Samir and Lambert, Christophe G and Li, Li and Li, Yanen and Li, Zhen and Lin, Simon M and Liu, Guozhen and Lobenhofer, Edward K and Luo, Jun and Luo, Wen and McCall, Matthew N and Nikolsky, Yuri and Pennello, Gene A and Perkins, Roger G and Philip, Reena and Popovici, Vlad and Price, Nathan D and Qian, Feng and Scherer, Andreas and Shi, Tieliu and Shi, Weiwei and Sung, Jaeyun and Thierry-Mieg, Danielle and Thierry-Mieg, Jean and Thodima, Venkata and Trygg, Johan and Vishnuvajjala, Lakshmi and Wang, Sue Jane and Wu, Jianping and Wu, Yichao and Xie, Qian and Yousef, Waleed A and Zhang, Liang and Zhang, Xuegong and Zhong, Sheng and Zhou, Yiming and Zhu, Sheng and Arasappan, Dhivya and Bao, Wenjun and Lucas, Anne Bergstrom and Berthold, Frank and Brennan, Richard J and Buness, Andreas and Catalano, Jennifer G and Chang, Chang and Chen, Rong and Cheng, Yiyu and Cui, Jian and Czika, Wendy and Demichelis, Francesca and Deng, Xutao and Dosymbekov, Damir and Eils, Roland and Feng, Yang and Fostel, Jennifer and Fulmer-Smentek, Stephanie and Fuscoe, James C and Gatto, Laurent and Ge, Weigong and Goldstein, Darlene R and Guo, Li and Halbert, Donald N and Han, Jing and Harris, Stephen C and Hatzis, Christos and Herman, Damir and Huang, Jianping and Jensen, Roderick V and Jiang, Rui and Johnson, Charles D and Jurman, Giuseppe and Kahlert, Yvonne and Khuder, Sadik A and Kohl, Matthias and Li, Jianying and Li, Li and Li, Menglong and Li, Quan-Zhen and Li, Shao and Li, Zhiguang and Liu, Jie and Liu, Ying and Liu, Zhichao and Meng, Lu and Madera, Manuel and Martinez-Murillo, Francisco and Medina, Ignacio and Meehan, Joseph and Miclaus, Kelci and Moffitt, Richard A and Montaner, David and Mukherjee, Piali and Mulligan, George J and Neville, Padraic and Nikolskaya, Tatiana and Ning, Baitang and Page, Grier P and Parker, Joel and Parry, R Mitchell and Peng, Xuejun and Peterson, Ron L and Phan, John H and Quanz, Brian and Ren, Yi and Riccadonna, Samantha and Roter, Alan H and Samuelson, Frank W and Schumacher, Martin M and Shambaugh, Joseph D and Shi, Qiang and Shippy, Richard and Si, Shengzhu and Smalter, Aaron and Sotiriou, Christos and Soukup, Mat and Staedtler, Frank and Steiner, Guido and Stokes, Todd H and Sun, Qinglan and Tan, Pei-Yi and Tang, Rong and Tezak, Zivana and Thorn, Brett and Tsyganova, Marina and Turpaz, Yaron and Vega, Silvia C and Visintainer, Roberto and von Frese, Juergen and Wang, Charles and Wang, Eric and Wang, Junwei and Wang, Wei and Westermann, Frank and Willey, James C and Woods, Matthew and Wu, Shujian and Xiao, Nianqing and Xu, Joshua and Xu, Lei and Yang, Lun and Zeng, Xiao and Zhang, Jialu and Zhang, Li and Zhang, Min and Zhao, Chen and Puri, Raj K and Scherf, Uwe and Tong, Weida and Wolfinger, Russell D} } @article {19906725, title = {SIMAP{\textendash}a comprehensive database of pre-calculated protein sequence similarities, domains, annotations and clusters.}, journal = {Nucleic acids research}, volume = {38}, year = {2010}, month = {2010 Jan}, pages = {D223-6}, abstract = {

The prediction of protein function as well as the reconstruction of evolutionary genesis employing sequence comparison at large is still the most powerful tool in sequence analysis. Due to the exponential growth of the number of known protein sequences and the subsequent quadratic growth of the similarity matrix, the computation of the Similarity Matrix of Proteins (SIMAP) becomes a computational intensive task. The SIMAP database provides a comprehensive and up-to-date pre-calculation of the protein sequence similarity matrix, sequence-based features and sequence clusters. As of September 2009, SIMAP covers 48 million proteins and more than 23 million non-redundant sequences. Novel features of SIMAP include the expansion of the sequence space by including databases such as ENSEMBL as well as the integration of metagenomes based on their consistent processing and annotation. Furthermore, protein function predictions by Blast2GO are pre-calculated for all sequences in SIMAP and the data access and query functions have been improved. SIMAP assists biologists to query the up-to-date sequence space systematically and facilitates large-scale downstream projects in computational biology. Access to SIMAP is freely provided through the web portal for individuals (http://mips.gsf.de/simap/) and for programmatic access through DAS (http://webclu.bio.wzw.tum.de/das/) and Web-Service (http://mips.gsf.de/webservices/services/SimapService2.0?wsdl).

}, author = {Rattei, Thomas and Tischler, Patrick and G{\"o}tz, Stefan and Jehl, Marc-Andr{\'e} and Hoser, Jonathan and Arnold, Roland and Ana Conesa and Mewes, Hans-Werner} } @article { PubMed_19441879, title = {Modeling and managing experimental data using FuGE.}, journal = {OMICS}, volume = {13}, number = {3}, year = {2009}, pages = {239-51}, issn = {1557-8100}, author = {Andrew R Jones and Allyson L Lister and Leandro Hermida and Peter Wilkinson and Martin Eisenacher and Khalid Belhajjame and Frank Gibson and Phil Lord and Matthew Pocock and Heiko Rosenfelder and Santoyo-L{\'o}pez, Javier and Anil Wipat and Norman W Paton} } @article {763, title = {Pere Alberch: Originator of EvoDevo}, journal = {Biological Theory}, volume = {3}, number = {4}, year = {2009}, pages = {351-353}, author = {Reiss, JO and Burke, A C and Archer, C and De Renzi, M and H. Dopazo and Etxeberria, A and Gale, E A and Hinchliffe, J R and Nu{\~n}o de la Rosa, L and Rose, C S and Rasskin-Gutman, D and M{\"u}ller, G} } @conference {585, title = {Peripheral blood cells transcriptome to study new biomarkers for myocardial infarction follow up}, year = {2009}, month = {06}, author = {Silbiger, Vivian and Luchessi, Andr{\'e} and Hirata, Rosario and Carracedo, {\'A}ngel and Bri{\'o}n, Maria and Lima Neto, Lidio and P. Pastorelli, C and Dopazo, Joaquin and Montaner, David and Garcia, F and P. Sampaio, M and P. Pereira, M and S. Santos, E and Armaganijan, Dikran and Hirata, Mario} } @article {17597009, title = {CLEAR-test: combining inference for differential expression and variability in microarray data analysis}, journal = {J Biomed Inform}, volume = {41}, number = {1}, year = {2008}, note = {

Valls, Joan Grau, Monica Sole, Xavier Hernandez, Pilar Montaner, David Dopazo, Joaquin Peinado, Miguel A Capella, Gabriel Moreno, Victor Pujana, Miguel Angel Comparative Study Research Support, Non-U.S. Gov{\textquoteright}t United States Journal of biomedical informatics J Biomed Inform. 2008 Feb;41(1):33-45. Epub 2007 May 17.

}, pages = {33-45}, abstract = {

A common goal of microarray experiments is to detect genes that are differentially expressed under distinct experimental conditions. Several statistical tests have been proposed to determine whether the observed changes in gene expression are significant. The t-test assigns a score to each gene on the basis of changes in its expression relative to its estimated variability, in such a way that genes with a higher score (in absolute values) are more likely to be significant. Most variants of the t-test use the complete set of genes to influence the variance estimate for each single gene. However, no inference is made in terms of the variability itself. Here, we highlight the problem of low observed variances in the t-test, when genes with relatively small changes are declared differentially expressed. Alternatively, the z-test could be used although, unlike the t-test, it can declare differentially expressed genes with high observed variances. To overcome this, we propose to combine the z-test, which focuses on large changes, with a chi(2) test to evaluate variability. We call this procedure CLEAR-test and we provide a combined p-value that offers a compromise between both aspects. Analysis of three publicly available microarray datasets reveals the greater performance of the CLEAR-test relative to the t-test and alternative methods. Finally, empirical and simulated data analyses demonstrate the greater reproducibility and statistical power of the CLEAR-test and z-test with respect to current alternative methods. In addition, the CLEAR-test improves the z-test by capturing reproducible genes with high variability.

}, keywords = {*Algorithms Artificial Intelligence *Data Interpretation, Statistical Gene Expression Profiling/*methods Gene Expression Regulation/*physiology Oligonucleotide Array Sequence Analysis/*methods Proteome/*metabolism Signal Transduction/*physiology}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=17597009}, author = {Valls, J. and Grau, M. and Sole, X. and Hernandez, P. and Montaner, D. and Dopazo, J. and Peinado, M. A. and Capella, G. and Moreno, V. and Pujana, M. A.} } @article {18697870, title = {Controlled ovarian stimulation induces a functional genomic delay of the endometrium with potential clinical implications}, journal = {J Clin Endocrinol Metab}, volume = {93}, number = {11}, year = {2008}, note = {

Horcajadas, Jose A Minguez, Pablo Dopazo, Joaquin Esteban, Francisco J Dominguez, Francisco Giudice, Linda C Pellicer, Antonio Simon, Carlos Research Support, Non-U.S. Gov{\textquoteright}t United States The Journal of clinical endocrinology and metabolism J Clin Endocrinol Metab. 2008 Nov;93(11):4500-10. Epub 2008 Aug 12.

}, pages = {4500-10}, abstract = {

CONTEXT: Controlled ovarian stimulation induces morphological, biochemical, and functional genomic modifications of the human endometrium during the window of implantation. OBJECTIVE: Our objective was to compare the gene expression profile of the human endometrium in natural vs. controlled ovarian stimulation cycles throughout the early-mid secretory transition using microarray technology. METHOD: Microarray data from 49 endometrial biopsies obtained from LH+1 to LH+9 (n=25) in natural cycles and from human chorionic gonadotropin (hCG) +1 to hCG+9 in controlled ovarian stimulation cycles (n=24) were analyzed using different methods, such as clustering, profiling of biological processes, and selection of differentially expressed genes, as implemented in Gene Expression Pattern Analysis Suite and Babelomics programs. RESULTS: Endometria from natural cycles followed different genomic patterns compared with controlled ovarian stimulation cycles in the transition from the pre-receptive (days LH/hCG+1 until LH/hCG+5) to the receptive phase (day LH+7/hCG+7). Specifically, we have demonstrated the existence of a 2-d delay in the activation/repression of two clusters composed by 218 and 133 genes, respectively, on day hCG+7 vs. LH+7. Many of these delayed genes belong to the class window of implantation genes affecting basic biological processes in the receptive endometrium. CONCLUSIONS: These results demonstrate that gene expression profiling of the endometrium is different between natural and controlled ovarian stimulation cycles in the receptive phase. Identification of these differentially regulated genes can be used to understand the different developmental profiles of receptive endometrium during controlled ovarian stimulation and to search for the best controlled ovarian stimulation treatment in terms of minimal endometrial impact.

}, keywords = {Algorithms Chorionic Gonadotropin/genetics Endometrium/cytology/pathology/*physiology/physiopathology Female Gene Expression Regulation Genome, Human Glutathione Peroxidase/genetics Humans Insulin-Like Growth Factor Binding Proteins/genetics Luteal Phase/physiology Luteinizing Hormone/genetics Menstrual Cycle Oligonucleotide Array Sequence Analysis Ovulation Induction/*methods RNA/genetics/isola}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=18697870}, author = {Horcajadas, J. A. and Minguez, P. and Dopazo, J. and Esteban, F. J. and Dominguez, F. and Giudice, L. C. and Pellicer, A. and Simon, C.} } @article {18508806, title = {GEPAS, a web-based tool for microarray data analysis and interpretation}, journal = {Nucleic Acids Res}, volume = {36}, year = {2008}, note = {

Tarraga, Joaquin Medina, Ignacio Carbonell, Jose Huerta-Cepas, Jaime Minguez, Pablo Alloza, Eva Al-Shahrour, Fatima Vegas-Azcarate, Susana Goetz, Stefan Escobar, Pablo Garcia-Garcia, Francisco Conesa, Ana Montaner, David Dopazo, Joaquin Research Support, Non-U.S. Gov{\textquoteright}t England Nucleic acids research Nucleic Acids Res. 2008 Jul 1;36(Web Server issue):W308-14. Epub 2008 May 28.

}, pages = {W308-14}, abstract = {

Gene Expression Profile Analysis Suite (GEPAS) is one of the most complete and extensively used web-based packages for microarray data analysis. During its more than 5 years of activity it has continuously been updated to keep pace with the state-of-the-art in the changing microarray data analysis arena. GEPAS offers diverse analysis options that include well established as well as novel algorithms for normalization, gene selection, class prediction, clustering and functional profiling of the experiment. New options for time-course (or dose-response) experiments, microarray-based class prediction, new clustering methods and new tests for differential expression have been included. The new pipeliner module allows automating the execution of sequential analysis steps by means of a simple but powerful graphic interface. An extensive re-engineering of GEPAS has been carried out which includes the use of web services and Web 2.0 technology features, a new user interface with persistent sessions and a new extended database of gene identifiers. GEPAS is nowadays the most quoted web tool in its field and it is extensively used by researchers of many countries and its records indicate an average usage rate of 500 experiments per day. GEPAS, is available at http://www.gepas.org.

}, keywords = {gepas, microarray data analysis}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=18508806}, author = {Tarraga, J. and Medina, Ignacio and Carbonell, J. and Huerta-Cepas, J. and Minguez, P. and Alloza, E. and Fatima Al-Shahrour and Vegas-Azcarate, S. and Goetz, S. and Escobar, P. and Garcia-Garcia, F. and A. Conesa and Montaner, D. and Dopazo, J.} } @article {593, title = {GEPAS, a web-based tool for microarray data analysis and interpretation.}, journal = {Nucleic Acids Res}, volume = {36}, year = {2008}, month = {2008 Jul 01}, pages = {W308-14}, abstract = {

Gene Expression Profile Analysis Suite (GEPAS) is one of the most complete and extensively used web-based packages for microarray data analysis. During its more than 5 years of activity it has continuously been updated to keep pace with the state-of-the-art in the changing microarray data analysis arena. GEPAS offers diverse analysis options that include well established as well as novel algorithms for normalization, gene selection, class prediction, clustering and functional profiling of the experiment. New options for time-course (or dose-response) experiments, microarray-based class prediction, new clustering methods and new tests for differential expression have been included. The new pipeliner module allows automating the execution of sequential analysis steps by means of a simple but powerful graphic interface. An extensive re-engineering of GEPAS has been carried out which includes the use of web services and Web 2.0 technology features, a new user interface with persistent sessions and a new extended database of gene identifiers. GEPAS is nowadays the most quoted web tool in its field and it is extensively used by researchers of many countries and its records indicate an average usage rate of 500 experiments per day. GEPAS, is available at http://www.gepas.org.

}, keywords = {Computer Graphics, Dose-Response Relationship, Drug, Gene Expression Profiling, Internet, Kinetics, Oligonucleotide Array Sequence Analysis, Software}, issn = {1362-4962}, doi = {10.1093/nar/gkn303}, author = {T{\'a}rraga, Joaqu{\'\i}n and Medina, Ignacio and Carbonell, Jos{\'e} and Huerta-Cepas, Jaime and Minguez, Pablo and Alloza, Eva and Al-Shahrour, F{\'a}tima and Vegas-Azc{\'a}rate, Susana and Goetz, Stefan and Escobar, Pablo and Garcia-Garcia, Francisco and Conesa, Ana and Montaner, David and Dopazo, Joaquin} } @article {595, title = {Interoperability with Moby 1.0--it{\textquoteright}s better than sharing your toothbrush!}, journal = {Brief Bioinform}, volume = {9}, year = {2008}, month = {2008 May}, pages = {220-31}, abstract = {

The BioMoby project was initiated in 2001 from within the model organism database community. It aimed to standardize methodologies to facilitate information exchange and access to analytical resources, using a consensus driven approach. Six years later, the BioMoby development community is pleased to announce the release of the 1.0 version of the interoperability framework, registry Application Programming Interface and supporting Perl and Java code-bases. Together, these provide interoperable access to over 1400 bioinformatics resources worldwide through the BioMoby platform, and this number continues to grow. Here we highlight and discuss the features of BioMoby that make it distinct from other Semantic Web Service and interoperability initiatives, and that have been instrumental to its deployment and use by a wide community of bioinformatics service providers. The standard, client software, and supporting code libraries are all freely available at http://www.biomoby.org/.

}, keywords = {Computational Biology, Database Management Systems, Databases, Factual, Information Storage and Retrieval, Internet, Programming Languages, Systems Integration}, issn = {1477-4054}, doi = {10.1093/bib/bbn003}, author = {Wilkinson, Mark D and Senger, Martin and Kawas, Edward and Bruskiewich, Richard and Gouzy, Jerome and Noirot, Celine and Bardou, Philippe and Ng, Ambrose and Haase, Dirk and Saiz, Enrique de Andres and Wang, Dennis and Gibbons, Frank and Gordon, Paul M K and Sensen, Christoph W and Carrasco, Jose Manuel Rodriguez and Fern{\'a}ndez, Jos{\'e} M and Shen, Lixin and Links, Matthew and Ng, Michael and Opushneva, Nina and Neerincx, Pieter B T and Leunissen, Jack A M and Ernst, Rebecca and Twigger, Simon and Usadel, Bjorn and Good, Benjamin and Wong, Yan and Stein, Lincoln and Crosby, William and Karlsson, Johan and Royo, Romina and P{\'a}rraga, Iv{\'a}n and Ram{\'\i}rez, Sergio and Gelpi, Josep Lluis and Trelles, Oswaldo and Pisano, David G and Jimenez, Natalia and Kerhornou, Arnaud and Rosset, Roman and Zamacola, Leire and T{\'a}rraga, Joaqu{\'\i}n and Huerta-Cepas, Jaime and Carazo, Jose Mar{\'\i}a and Dopazo, Joaquin and Guig{\'o}, Roderic and Navarro, Arcadi and Orozco, Modesto and Valencia, Alfonso and Claros, M Gonzalo and P{\'e}rez, Antonio J and Aldana, Jose and Rojano, M Mar and Fernandez-Santa Cruz, Raul and Navas, Ismael and Schiltz, Gary and Farmer, Andrew and Gessler, Damian and Schoof, Heiko and Groscurth, Andreas} } @article {18238804, title = {Interoperability with Moby 1.0{\textendash}it{\textquoteright}s better than sharing your toothbrush!}, journal = {Brief Bioinform}, volume = {9}, number = {3}, year = {2008}, note = {

BioMoby Consortium Wilkinson, Mark D Senger, Martin Kawas, Edward Bruskiewich, Richard Gouzy, Jerome Noirot, Celine Bardou, Philippe Ng, Ambrose Haase, Dirk Saiz, Enrique de Andres Wang, Dennis Gibbons, Frank Gordon, Paul M K Sensen, Christoph W Carrasco, Jose Manuel Rodriguez Fernandez, Jose M Shen, Lixin Links, Matthew Ng, Michael Opushneva, Nina Neerincx, Pieter B T Leunissen, Jack A M Ernst, Rebecca Twigger, Simon Usadel, Bjorn Good, Benjamin Wong, Yan Stein, Lincoln Crosby, William Karlsson, Johan Royo, Romina Parraga, Ivan Ramirez, Sergio Gelpi, Josep Lluis Trelles, Oswaldo Pisano, David G Jimenez, Natalia Kerhornou, Arnaud Rosset, Roman Zamacola, Leire Tarraga, Joaquin Huerta-Cepas, Jaime Carazo, Jose Maria Dopazo, Joaquin Guigo, Roderic Navarro, Arcadi Orozco, Modesto Valencia, Alfonso Claros, M Gonzalo Perez, Antonio J Aldana, Jose Rojano, M Mar Fernandez-Santa Cruz, Raul Navas, Ismael Schiltz, Gary Farmer, Andrew Gessler, Damian Schoof, Heiko Groscurth, Andreas Research Support, Non-U.S. Gov{\textquoteright}t Review England Briefings in bioinformatics Brief Bioinform. 2008 May;9(3):220-31. Epub 2008 Jan 31.

}, pages = {220-31}, abstract = {

The BioMoby project was initiated in 2001 from within the model organism database community. It aimed to standardize methodologies to facilitate information exchange and access to analytical resources, using a consensus driven approach. Six years later, the BioMoby development community is pleased to announce the release of the 1.0 version of the interoperability framework, registry Application Programming Interface and supporting Perl and Java code-bases. Together, these provide interoperable access to over 1400 bioinformatics resources worldwide through the BioMoby platform, and this number continues to grow. Here we highlight and discuss the features of BioMoby that make it distinct from other Semantic Web Service and interoperability initiatives, and that have been instrumental to its deployment and use by a wide community of bioinformatics service providers. The standard, client software, and supporting code libraries are all freely available at http://www.biomoby.org/.

}, keywords = {Computational Biology/*methods *Database Management Systems *Databases, Factual Information Storage and Retrieval/*methods *Internet *Programming Languages Systems Integration}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=18238804}, author = {Wilkinson, M. D. and Senger, M. and Kawas, E. and Bruskiewich, R. and Gouzy, J. and Noirot, C. and Bardou, P. and Ng, A. and Haase, D. and Saiz Ede, A. and Wang, D. and Gibbons, F. and Gordon, P. M. and Sensen, C. W. and Carrasco, J. M. and Fernandez, J. M. and Shen, L. and Links, M. and Ng, M. and Opushneva, N. and Neerincx, P. B. and Leunissen, J. A. and Ernst, R. and Twigger, S. and Usadel, B. and Good, B. and Wong, Y. and Stein, L. and Crosby, W. and Karlsson, J. and Royo, R. and Parraga, I. and Ramirez, S. and Gelpi, J. L. and Trelles, O. and Pisano, D. G. and Jimenez, N. and Kerhornou, A. and Rosset, R. and Zamacola, L. and Tarraga, J. and Huerta-Cepas, J. and Carazo, J. M. and Dopazo, J. and R. Guigo and Navarro, A. and Orozco, M. and Valencia, A. and Claros, M. G. and Perez, A. J. and Aldana, J. and Rojano, M. M. and Fernandez-Santa Cruz, R. and Navas, I. and Schiltz, G. and Farmer, A. and Gessler, D. and Schoof, H. and Groscurth, A.} } @article {598, title = {PhylomeDB: a database for genome-wide collections of gene phylogenies.}, journal = {Nucleic Acids Res}, volume = {36}, year = {2008}, month = {2008 Jan}, pages = {D491-6}, abstract = {

The complete collection of evolutionary histories of all genes in a genome, also known as phylome, constitutes a valuable source of information. The reconstruction of phylomes has been previously prevented by large demands of time and computer power, but is now feasible thanks to recent developments in computers and algorithms. To provide a publicly available repository of complete phylomes that allows researchers to access and store large-scale phylogenomic analyses, we have developed PhylomeDB. PhylomeDB is a database of complete phylomes derived for different genomes within a specific taxonomic range. All phylomes in the database are built using a high-quality phylogenetic pipeline that includes evolutionary model testing and alignment trimming phases. For each genome, PhylomeDB provides the alignments, phylogentic trees and tree-based orthology predictions for every single encoded protein. The current version of PhylomeDB includes the phylomes of Human, the yeast Saccharomyces cerevisiae and the bacterium Escherichia coli, comprising a total of 32 289 seed sequences with their corresponding alignments and 172 324 phylogenetic trees. PhylomeDB can be publicly accessed at http://phylomedb.bioinfo.cipf.es.

}, keywords = {Base Sequence, Escherichia coli, Genes, Genomics, History, Ancient, Humans, Phylogeny, Proteins, Saccharomyces cerevisiae, Sequence Alignment}, issn = {1362-4962}, doi = {10.1093/nar/gkm899}, author = {Huerta-Cepas, Jaime and Bueno, Anibal and Dopazo, Joaquin and Gabald{\'o}n, Toni} } @article {17962297, title = {PhylomeDB: a database for genome-wide collections of gene phylogenies}, journal = {Nucleic Acids Res}, volume = {36}, number = {Database issue}, year = {2008}, note = {Huerta-Cepas, Jaime Bueno, Anibal Dopazo, Joaquin Gabaldon, Toni Historical Article Research Support, Non-U.S. Gov{\textquoteright}t England Nucleic acids research Nucleic Acids Res. 2008 Jan;36(Database issue):D491-6. Epub 2007 Oct 25.}, pages = {D491-6}, abstract = {The complete collection of evolutionary histories of all genes in a genome, also known as phylome, constitutes a valuable source of information. The reconstruction of phylomes has been previously prevented by large demands of time and computer power, but is now feasible thanks to recent developments in computers and algorithms. To provide a publicly available repository of complete phylomes that allows researchers to access and store large-scale phylogenomic analyses, we have developed PhylomeDB. PhylomeDB is a database of complete phylomes derived for different genomes within a specific taxonomic range. All phylomes in the database are built using a high-quality phylogenetic pipeline that includes evolutionary model testing and alignment trimming phases. For each genome, PhylomeDB provides the alignments, phylogentic trees and tree-based orthology predictions for every single encoded protein. The current version of PhylomeDB includes the phylomes of Human, the yeast Saccharomyces cerevisiae and the bacterium Escherichia coli, comprising a total of 32 289 seed sequences with their corresponding alignments and 172 324 phylogenetic trees. PhylomeDB can be publicly accessed at http://phylomedb.bioinfo.cipf.es.}, keywords = {Ancient Humans *Phylogeny Proteins/classification/genetics Saccharomyces cerevisiae/classification/genetics Sequence Alignment, Base Sequence Escherichia coli/classification/genetics Genes *Genomics History}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=17962297}, author = {Huerta-Cepas, J. and Bueno, A. and Dopazo, J. and Gabald{\'o}n, T.} } @article {599, title = {SNP and haplotype mapping for genetic analysis in the rat.}, journal = {Nat Genet}, volume = {40}, year = {2008}, month = {2008 May}, pages = {560-6}, abstract = {

The laboratory rat is one of the most extensively studied model organisms. Inbred laboratory rat strains originated from limited Rattus norvegicus founder populations, and the inherited genetic variation provides an excellent resource for the correlation of genotype to phenotype. Here, we report a survey of genetic variation based on almost 3 million newly identified SNPs. We obtained accurate and complete genotypes for a subset of 20,238 SNPs across 167 distinct inbred rat strains, two rat recombinant inbred panels and an F2 intercross. Using 81\% of these SNPs, we constructed high-density genetic maps, creating a large dataset of fully characterized SNPs for disease gene mapping. Our data characterize the population structure and illustrate the degree of linkage disequilibrium. We provide a detailed SNP map and demonstrate its utility for mapping of quantitative trait loci. This community resource is openly available and augments the genetic tools for this workhorse of physiological studies.

}, keywords = {Animals, Chromosome Mapping, Databases, Genetic, Genome, Haplotypes, Linkage Disequilibrium, Phylogeny, Polymorphism, Single Nucleotide, Quantitative Trait Loci, Rats, Rats, Inbred Strains, Recombination, Genetic}, issn = {1546-1718}, doi = {10.1038/ng.124}, author = {Saar, Kathrin and Beck, Alfred and Bihoreau, Marie-Th{\'e}r{\`e}se and Birney, Ewan and Brocklebank, Denise and Chen, Yuan and Cuppen, Edwin and Demonchy, Stephanie and Dopazo, Joaquin and Flicek, Paul and Foglio, Mario and Fujiyama, Asao and Gut, Ivo G and Gauguier, Dominique and Guig{\'o}, Roderic and Guryev, Victor and Heinig, Matthias and Hummel, Oliver and Jahn, Niels and Klages, Sven and Kren, Vladimir and Kube, Michael and Kuhl, Heiner and Kuramoto, Takashi and Kuroki, Yoko and Lechner, Doris and Lee, Young-Ae and Lopez-Bigas, Nuria and Lathrop, G Mark and Mashimo, Tomoji and Medina, Ignacio and Mott, Richard and Patone, Giannino and Perrier-Cornet, Jeanne-Antide and Platzer, Matthias and Pravenec, Michal and Reinhardt, Richard and Sakaki, Yoshiyuki and Schilhabel, Markus and Schulz, Herbert and Serikawa, Tadao and Shikhagaie, Medya and Tatsumoto, Shouji and Taudien, Stefan and Toyoda, Atsushi and Voigt, Birger and Zelenika, Diana and Zimdahl, Heike and Hubner, Norbert} } @article {18443594, title = {SNP and haplotype mapping for genetic analysis in the rat}, journal = {Nat Genet}, volume = {40}, number = {5}, year = {2008}, note = {

STAR Consortium Saar, Kathrin Beck, Alfred Bihoreau, Marie-Therese Birney, Ewan Brocklebank, Denise Chen, Yuan Cuppen, Edwin Demonchy, Stephanie Dopazo, Joaquin Flicek, Paul Foglio, Mario Fujiyama, Asao Gut, Ivo G Gauguier, Dominique Guigo, Roderic Guryev, Victor Heinig, Matthias Hummel, Oliver Jahn, Niels Klages, Sven Kren, Vladimir Kube, Michael Kuhl, Heiner Kuramoto, Takashi Kuroki, Yoko Lechner, Doris Lee, Young-Ae Lopez-Bigas, Nuria Lathrop, G Mark Mashimo, Tomoji Medina, Ignacio Mott, Richard Patone, Giannino Perrier-Cornet, Jeanne-Antide Platzer, Matthias Pravenec, Michal Reinhardt, Richard Sakaki, Yoshiyuki Schilhabel, Markus Schulz, Herbert Serikawa, Tadao Shikhagaie, Medya Tatsumoto, Shouji Taudien, Stefan Toyoda, Atsushi Voigt, Birger Zelenika, Diana Zimdahl, Heike Hubner, Norbert 057733/Z/99/A/Wellcome Trust/United Kingdom 066780/Z/01/Z/Wellcome Trust/United Kingdom Research Support, Non-U.S. Gov{\textquoteright}t Technical Report United States Nature genetics Nat Genet. 2008 May;40(5):560-6.

}, pages = {560-6}, abstract = {

The laboratory rat is one of the most extensively studied model organisms. Inbred laboratory rat strains originated from limited Rattus norvegicus founder populations, and the inherited genetic variation provides an excellent resource for the correlation of genotype to phenotype. Here, we report a survey of genetic variation based on almost 3 million newly identified SNPs. We obtained accurate and complete genotypes for a subset of 20,238 SNPs across 167 distinct inbred rat strains, two rat recombinant inbred panels and an F2 intercross. Using 81\% of these SNPs, we constructed high-density genetic maps, creating a large dataset of fully characterized SNPs for disease gene mapping. Our data characterize the population structure and illustrate the degree of linkage disequilibrium. We provide a detailed SNP map and demonstrate its utility for mapping of quantitative trait loci. This community resource is openly available and augments the genetic tools for this workhorse of physiological studies.

}, keywords = {Animals Chromosome Mapping *Databases, Genetic, Genetic Genome *Haplotypes Linkage Disequilibrium Phylogeny *Polymorphism, Inbred Strains/*genetics Recombination, Single Nucleotide *Quantitative Trait Loci Rats/*genetics Rats}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=18443594}, author = {K. Saar and A. Beck and M. T. Bihoreau and E. Birney and D. Brocklebank and Y. Chen and E. Cuppen and S. Demonchy and Dopazo, J. and P. Flicek and M. Foglio and A. Fujiyama and I. G. Gut and D. Gauguier and R. Guigo and V. Guryev and M. Heinig and O. Hummel and N. Jahn and S. Klages and V. Kren and M. Kube and H. Kuhl and Kuramoto, T. and Kuroki, Y. and Lechner, D. and Lee, Y. A. and Lopez-Bigas, N. and Lathrop, G. M. and Mashimo, T. and Medina, Ignacio and Mott, R. and Patone, G. and Perrier-Cornet, J. A. and Platzer, M. and Pravenec, M. and Reinhardt, R. and Sakaki, Y. and Schilhabel, M. and Schulz, H. and Serikawa, T. and Shikhagaie, M. and Tatsumoto, S. and Taudien, S. and Toyoda, A. and Voigt, B. and Zelenika, D. and Zimdahl, H. and Hubner, N.} } @article {18552980, title = {Time course profiling of the retinal transcriptome after optic nerve transection and optic nerve crush}, journal = {Mol Vis}, volume = {14}, year = {2008}, note = {Agudo, Marta Perez-Marin, Maria Cruz Lonngren, Ulrika Sobrado, Paloma Conesa, Ana Canovas, Isabel Salinas-Navarro, Manuel Miralles-Imperial, Jaime Hallbook, Finn Vidal-Sanz, Manuel Research Support, Non-U.S. Gov{\textquoteright}t United States Molecular vision Mol Vis. 2008 Jun 3;14:1050-63.}, pages = {1050-63}, abstract = {PURPOSE: A time-course analysis of gene regulation in the adult rat retina after intraorbital nerve crush (IONC) and intraorbital nerve transection (IONT). METHODS: RNA was extracted from adult rat retinas undergoing either IONT or IONC at increasing times post-lesion. Affymetrix RAE230.2 arrays were hybridized and analyzed. Statistically regulated genes were annotated and functionally clustered. Arrays were validated by means of quantative reverse transcription polymerase chain reaction (qRT-PCR) on ten regulated genes at two times post-lesion. Western blotting and immunohistofluorescence for four pro-apoptotic proteins were performed on naive and injured retinas. Finally, custom signaling maps for IONT- and IONC-induced death response were generated (MetaCore, Genego Inc.). RESULTS: Here we show that over time, 3,219 sequences were regulated after IONT and 1,996 after IONC. Out of the total of regulated sequences, 1,078 were commonly regulated by both injuries. Interestingly, while IONT mainly triggers a gene upregulation-sustained over time, IONC causes a transitory downregulation. Functional clustering identified the regulation of high interest biologic processes, most importantly cell death wherein apoptosis was the most significant cluster. Ten death-related genes upregulated by both injuries were used for array validation by means of qRT-PCR. In addition, western blotting and immunohistofluorescence of total and active Caspase 3 (Casp3), tumor necrosis factor receptor type 1 associated death domain (TRADD), tumor necrosis factor receptor superfamily member 1a (TNFR1a), and c-fos were performed to confirm their protein regulation and expression pattern in naive and injured retinas. These analyses demonstrated that for these genes, protein regulation followed transcriptional regulation and that these pro-apoptotic proteins were expressed by retinal ganglion cells (RGCs). MetaCore-based death-signaling maps show that several apoptotic cascades were regulated in the retina following optic nerve injury and highlight the similarities and differences between IONT and IONC in cell death profiling. CONCLUSIONS: This comprehensive time course retinal transcriptome study comparing IONT and IONC lesions provides a unique valuable tool to understand the molecular mechanisms underlying optic nerve injury and to design neuroprotective protocols.}, keywords = {Animals Cell Death Cluster Analysis Female *Gene Expression Profiling Gene Expression Regulation *Nerve Crush Optic Nerve/*metabolism/*pathology Optic Nerve Injuries/*genetics Rats Rats, Sprague-Dawley Reproducibility of Results Retina/*metabolism/*pathology Time Factors}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=18552980}, author = {Agudo, M. and Perez-Marin, M. C. and Lonngren, U. and Sobrado, P. and A. Conesa and Canovas, I. and Salinas-Navarro, M. and Miralles-Imperial, J. and Hallbook, F. and Vidal-Sanz, M.} } @article {18848557, title = {Transcriptional profiling of mRNA expression in the mouse distal colon}, journal = {Gastroenterology}, volume = {135}, number = {6}, year = {2008}, note = {Hoogerwerf, Willemijntje A Sinha, Mala Conesa, Ana Luxon, Bruce A Shahinian, Vahakn B Cornelissen, Germaine Halberg, Franz Bostwick, Jonathon Timm, John Cassone, Vincent M R21 DK074477-01A1/DK/NIDDK NIH HHS/United States Comparative Study Research Support, N.I.H., Extramural United States Gastroenterology Gastroenterology. 2008 Dec;135(6):2019-29. Epub 2008 Sep 3.}, pages = {2019-29}, abstract = {BACKGROUND \& AIMS: Intestinal epithelial cells and the myenteric plexus of the mouse gastrointestinal tract contain a circadian clock-based intrinsic time-keeping system. Because disruption of the biological clock has been associated with increased susceptibility to colon cancer and gastrointestinal symptoms, we aimed to identify rhythmically expressed genes in the mouse distal colon. METHODS: Microarray analysis was used to identify genes that were rhythmically expressed over a 24-hour light/dark cycle. The transcripts were then classified according to expression pattern, function, and association with physiologic and pathophysiologic processes of the colon. RESULTS: A circadian gene expression pattern was detected in approximately 3.7\% of distal colonic genes. A large percentage of these genes were involved in cell signaling, differentiation, and proliferation and cell death. Of all the rhythmically expressed genes in the mouse colon, approximately 7\% (64/906) have been associated with colorectal cancer formation (eg, B-cell leukemia/lymphoma-2 [Bcl2]) and 1.8\% (18/906) with various colonic functions such as motility and secretion (eg, vasoactive intestinal polypeptide, cystic fibrosis transmembrane conductance regulator). CONCLUSIONS: A subset of genes in the murine colon follows a rhythmic expression pattern. These findings may have significant implications for colonic physiology and pathophysiology.}, keywords = {Animals Blotting, Genetic, Inbred C57BL Microarray Analysis Proteins/*genetics/metabolism RNA, Messenger/biosynthesis/*genetics Reverse Transcriptase Polymerase Chain Reaction *Transcription, Western Cell Proliferation Circadian Rhythm/*genetics Colon/cytology/*metabolism Male Mice Mice}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=18848557}, author = {Hoogerwerf, W. A. and Sinha, M. and A. Conesa and Luxon, B. A. and Shahinian, V. B. and Cornelissen, G. and Halberg, F. and Bostwick, J. and Timm, J. and Cassone, V. M.} } @article {18539377, title = {Transcriptome analysis provides new insights into liver changes induced in the rat upon dietary administration of the food additives butylated hydroxytoluene, curcumin, propyl gallate and thiabendazole}, journal = {Food Chem Toxicol}, volume = {46}, number = {8}, year = {2008}, note = {Stierum, Rob Conesa, Ana Heijne, Wilbert Ommen, Ben van Junker, Karin Scott, Mary P Price, Roger J Meredith, Clive Lake, Brian G Groten, John Research Support, Non-U.S. Gov{\textquoteright}t England Food and chemical toxicology : an international journal published for the British Industrial Biological Research Association Food Chem Toxicol. 2008 Aug;46(8):2616-28. Epub 2008 Apr 25.}, pages = {2616-28}, abstract = {Transcriptomics was performed to gain insight into mechanisms of food additives butylated hydroxytoluene (BHT), curcumin (CC), propyl gallate (PG), and thiabendazole (TB), additives for which interactions in the liver can not be excluded. Additives were administered in diets for 28 days to Sprague-Dawley rats and cDNA microarray experiments were performed on hepatic RNA. BHT induced changes in the expression of 10 genes, including phase I (CYP2B1/2; CYP3A9; CYP2C6) and phase II metabolism (GST mu2). The CYP2B1/2 and GST expression findings were confirmed by real time RT-PCR, western blotting, and increased GST activity towards DCNB. CC altered the expression of 12 genes. Three out of these were related to peroxisomes (phytanoyl-CoA dioxygenase, enoyl-CoA hydratase; CYP4A3). Increased cyanide insensitive palmitoyl-CoA oxidation was observed, suggesting that CC is a weak peroxisome proliferator. TB changed the expression of 12 genes, including CYP1A2. In line, CYP1A2 protein expression was increased. The expression level of five genes, associated with p53 was found to change upon TB treatment, including p53 itself, GADD45alpha, DN-7, protein kinase C beta and serum albumin. These array experiments led to the novel finding that TB is capable of inducing p53 at the protein level, at least at the highest dose levels employed above the current NOAEL. The expression of eight genes changed upon PG administration. This study shows the value of gene expression profiling in food toxicology in terms of generating novel hypotheses on the mechanisms of action of food additives in relation to pathology.}, keywords = {Animals Aryl Hydrocarbon Hydroxylases/metabolism Body Weight/drug effects Butylated Hydroxytoluene/toxicity Curcumin/toxicity Cytochrome P-450 CYP1A2/metabolism Cytochrome P-450 CYP2B1/metabolism DNA, Complementary/biosynthesis/genetics Data Interpretation, Sprague-Dawley Reverse Transcriptase Polymerase Chain Reaction Steroid Hydroxylases/metabolism Thiabendazole/toxicity, Statistical *Diet Food Additives/*toxicity Gene Expression/drug effects *Gene Expression Profiling Glutathione Transferase/metabolism Liver/*drug effects Male Organ Size/drug effects Oxidation-Reduction Palmitoyl Coenzyme A/metabolism Propyl Gallate/toxi}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=18539377}, author = {Stierum, R. and A. Conesa and Heijne, W. and Ommen, B. and Junker, K. and Scott, M. P. and Price, R. J. and Meredith, C. and Lake, B. G. and Groten, J.} } @article {17519250, title = {Discovering gene expression patterns in time course microarray experiments by ANOVA-SCA}, journal = {Bioinformatics}, volume = {23}, number = {14}, year = {2007}, note = {Nueda, Maria Jose Conesa, Ana Westerhuis, Johan A Hoefsloot, Huub C J Smilde, Age K Talon, Manuel Ferrer, Alberto Research Support, Non-U.S. Gov{\textquoteright}t England Bioinformatics (Oxford, England) Bioinformatics. 2007 Jul 15;23(14):1792-800. Epub 2007 May 22.}, pages = {1792-800}, abstract = {MOTIVATION: Designed microarray experiments are used to investigate the effects that controlled experimental factors have on gene expression and learn about the transcriptional responses associated with external variables. In these datasets, signals of interest coexist with varying sources of unwanted noise in a framework of (co)relation among the measured variables and with the different levels of the studied factors. Discovering experimentally relevant transcriptional changes require methodologies that take all these elements into account. RESULTS: In this work, we develop the application of the Analysis of variance-simultaneous component analysis (ANOVA-SCA) Smilde et al. Bioinformatics, (2005) to the analysis of multiple series time course microarray data as an example of multifactorial gene expression profiling experiments. We denoted this implementation as ASCA-genes. We show how the combination of ANOVA-modeling and a dimension reduction technique is effective in extracting targeted signals from data by-passing structural noise. The methodology is valuable for identifying main and secondary responses associated with the experimental factors and spotting relevant experimental conditions. We additionally propose a novel approach for gene selection in the context of the relation of individual transcriptional patterns to global gene expression signals. We demonstrate the methodology on both real and synthetic datasets. AVAILABILITY: ASCA-genes has been implemented in the statistical language R and is available at http://www.ivia.es/centrodegenomica/bioinformatics.htm. SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, keywords = {Algorithms *Analysis of Variance Computational Biology/*methods Computer Simulation Data Interpretation, Genetic, Genetic Models, Statistical Gene Expression Profiling/*methods Models, Statistical Oligonucleotide Array Sequence Analysis/*methods Principal Component Analysis Time Factors Transcription}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=17519250}, author = {Nueda, M. J. and A. Conesa and Westerhuis, J. A. and Hoefsloot, H. C. and Smilde, A. K. and Talon, M. and Ferrer, A.} } @article {17584915, title = {Evidence for systems-level molecular mechanisms of tumorigenesis}, journal = {BMC Genomics}, volume = {8}, year = {2007}, note = {Hernandez, Pilar Huerta-Cepas, Jaime Montaner, David Al-Shahrour, Fatima Valls, Joan Gomez, Laia Capella, Gabriel Dopazo, Joaquin Pujana, Miguel Angel Research Support, Non-U.S. Gov{\textquoteright}t England BMC genomics BMC Genomics. 2007 Jun 20;8:185.}, pages = {185}, abstract = {BACKGROUND: Cancer arises from the consecutive acquisition of genetic alterations. Increasing evidence suggests that as a consequence of these alterations, molecular interactions are reprogrammed in the context of highly connected and regulated cellular networks. Coordinated reprogramming would allow the cell to acquire the capabilities for malignant growth. RESULTS: Here, we determine the coordinated function of cancer gene products (i.e., proteins encoded by differentially expressed genes in tumors relative to healthy tissue counterparts, hereafter referred to as "CGPs") defined as their topological properties and organization in the interactome network. We show that CGPs are central to information exchange and propagation and that they are specifically organized to promote tumorigenesis. Centrality is identified by both local (degree) and global (betweenness and closeness) measures, and systematically appears in down-regulated CGPs. Up-regulated CGPs do not consistently exhibit centrality, but both types of cancer products determine the overall integrity of the network structure. In addition to centrality, down-regulated CGPs show topological association that correlates with common biological processes and pathways involved in tumorigenesis. CONCLUSION: Given the current limited coverage of the human interactome, this study proposes that tumorigenesis takes place in a specific and organized way at the molecular systems-level and suggests a model that comprises the precise down-regulation of groups of topologically-associated proteins involved in particular functions, orchestrated with the up-regulation of specific proteins.}, keywords = {*Cell Transformation, Biological Models, Genetic Models, Messenger/metabolism Signal Transduction Systems Biology, Neoplastic *Gene Expression Profiling *Gene Expression Regulation, Neoplastic Humans Male Models, Statistical Neoplasm Proteins/*physiology Neoplasms/etiology/*genetics Prostatic Neoplasms/genetics Protein Interaction Mapping RNA}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=17584915}, author = {Hernandez, P. and Huerta-Cepas, J. and Montaner, D. and Fatima Al-Shahrour and Valls, J. and Gomez, L. and Capella, G. and Dopazo, J. and Pujana, M. A.} } @article {604, title = {Evidence for systems-level molecular mechanisms of tumorigenesis.}, journal = {BMC Genomics}, volume = {8}, year = {2007}, month = {2007 Jun 20}, pages = {185}, abstract = {

BACKGROUND: Cancer arises from the consecutive acquisition of genetic alterations. Increasing evidence suggests that as a consequence of these alterations, molecular interactions are reprogrammed in the context of highly connected and regulated cellular networks. Coordinated reprogramming would allow the cell to acquire the capabilities for malignant growth.

RESULTS: Here, we determine the coordinated function of cancer gene products (i.e., proteins encoded by differentially expressed genes in tumors relative to healthy tissue counterparts, hereafter referred to as "CGPs") defined as their topological properties and organization in the interactome network. We show that CGPs are central to information exchange and propagation and that they are specifically organized to promote tumorigenesis. Centrality is identified by both local (degree) and global (betweenness and closeness) measures, and systematically appears in down-regulated CGPs. Up-regulated CGPs do not consistently exhibit centrality, but both types of cancer products determine the overall integrity of the network structure. In addition to centrality, down-regulated CGPs show topological association that correlates with common biological processes and pathways involved in tumorigenesis.

CONCLUSION: Given the current limited coverage of the human interactome, this study proposes that tumorigenesis takes place in a specific and organized way at the molecular systems-level and suggests a model that comprises the precise down-regulation of groups of topologically-associated proteins involved in particular functions, orchestrated with the up-regulation of specific proteins.

}, keywords = {Cell Transformation, Neoplastic, Gene Expression Profiling, Gene Expression Regulation, Neoplastic, Humans, Male, Models, Biological, Models, Genetic, Models, Statistical, Neoplasm Proteins, Neoplasms, Prostatic Neoplasms, Protein Interaction Mapping, RNA, Messenger, Signal Transduction, Systems biology}, issn = {1471-2164}, doi = {10.1186/1471-2164-8-185}, author = {Hern{\'a}ndez, Pilar and Huerta-Cepas, Jaime and Montaner, David and Al-Shahrour, F{\'a}tima and Valls, Joan and G{\'o}mez, Laia and Capell{\`a}, Gabriel and Dopazo, Joaquin and Pujana, Miguel Angel} } @article {17983265, title = {From endosymbiont to host-controlled organelle: the hijacking of mitochondrial protein synthesis and metabolism}, journal = {PLoS Comput Biol}, volume = {3}, number = {11}, year = {2007}, note = {Gabaldon, Toni Huynen, Martijn A Research Support, Non-U.S. Gov{\textquoteright}t United States PLoS computational biology PLoS Comput Biol. 2007 Nov;3(11):e219. Epub 2007 Sep 26.}, pages = {e219}, abstract = {Mitochondria are eukaryotic organelles that originated from the endosymbiosis of an alpha-proteobacterium. To gain insight into the evolution of the mitochondrial proteome as it proceeded through the transition from a free-living cell to a specialized organelle, we compared a reconstructed ancestral proteome of the mitochondrion with the proteomes of alpha-proteobacteria as well as with the mitochondrial proteomes in yeast and man. Overall, there has been a large turnover of the mitochondrial proteome during the evolution of mitochondria. Early in the evolution of the mitochondrion, proteins involved in cell envelope synthesis have virtually disappeared, whereas proteins involved in replication, transcription, cell division, transport, regulation, and signal transduction have been replaced by eukaryotic proteins. More than half of what remains from the mitochondrial ancestor in modern mitochondria corresponds to translation, including post-translational modifications, and to metabolic pathways that are directly, or indirectly, involved in energy conversion. Altogether, the results indicate that the eukaryotic host has hijacked the proto-mitochondrion, taking control of its protein synthesis and metabolism.}, keywords = {Computer Simulation DNA Mutational Analysis/methods Evolution *Evolution, Genetic Organelles/physiology Protein Biosynthesis/*genetics Symbiosis/*genetics, Molecular Fungal Proteins/*physiology Genetic Variation/genetics Humans Mitochondria/*physiology Mitochondrial Proteins/*physiology *Models}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=17983265}, author = {Gabald{\'o}n, T. and M. A. Huynen} } @article {17407596, title = {From genes to functional classes in the study of biological systems}, journal = {BMC Bioinformatics}, volume = {8}, year = {2007}, note = {

Al-Shahrour, Fatima Arbiza, Leonardo Dopazo, Hernan Huerta-Cepas, Jaime Minguez, Pablo Montaner, David Dopazo, Joaquin Research Support, Non-U.S. Gov{\textquoteright}t England BMC bioinformatics BMC Bioinformatics. 2007 Apr 3;8:114.

}, pages = {114}, abstract = {

BACKGROUND: With the popularization of high-throughput techniques, the need for procedures that help in the biological interpretation of results has increased enormously. Recently, new procedures inspired in systems biology criteria have started to be developed. RESULTS: Here we present FatiScan, a web-based program which implements a threshold-independent test for the functional interpretation of large-scale experiments that does not depend on the pre-selection of genes based on the multiple application of independent tests to each gene. The test implemented aims to directly test the behaviour of blocks of functionally related genes, instead of focusing on single genes. In addition, the test does not depend on the type of the data used for obtaining significance values, and consequently different types of biologically informative terms (gene ontology, pathways, functional motifs, transcription factor binding sites or regulatory sites from CisRed) can be applied to different classes of genome-scale studies. We exemplify its application in microarray gene expression, evolution and interactomics. CONCLUSION: Methods for gene set enrichment which, in addition, are independent from the original data and experimental design constitute a promising alternative for the functional profiling of genome-scale experiments. A web server that performs the test described and other similar ones can be found at: http://www.babelomics.org.

}, keywords = {Algorithms Chromosome Mapping/*methods Computer Simulation Gene Expression Profiling/methods *Models, babelomics, Biological Multigene Family/*physiology Signal Transduction/*physiology *Software Systems Biology/*methods *User-Computer Interface}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=17407596}, author = {Fatima Al-Shahrour and Arbiza, L. and H. Dopazo and Huerta-Cepas, J. and Minguez, P. and Montaner, D. and Dopazo, J.} } @article {606, title = {From genes to functional classes in the study of biological systems.}, journal = {BMC Bioinformatics}, volume = {8}, year = {2007}, month = {2007 Apr 03}, pages = {114}, abstract = {

BACKGROUND: With the popularization of high-throughput techniques, the need for procedures that help in the biological interpretation of results has increased enormously. Recently, new procedures inspired in systems biology criteria have started to be developed.

RESULTS: Here we present FatiScan, a web-based program which implements a threshold-independent test for the functional interpretation of large-scale experiments that does not depend on the pre-selection of genes based on the multiple application of independent tests to each gene. The test implemented aims to directly test the behaviour of blocks of functionally related genes, instead of focusing on single genes. In addition, the test does not depend on the type of the data used for obtaining significance values, and consequently different types of biologically informative terms (gene ontology, pathways, functional motifs, transcription factor binding sites or regulatory sites from CisRed) can be applied to different classes of genome-scale studies. We exemplify its application in microarray gene expression, evolution and interactomics.

CONCLUSION: Methods for gene set enrichment which, in addition, are independent from the original data and experimental design constitute a promising alternative for the functional profiling of genome-scale experiments. A web server that performs the test described and other similar ones can be found at: http://www.babelomics.org.

}, keywords = {Algorithms, Chromosome Mapping, Computer Simulation, Gene Expression Profiling, Models, Biological, Multigene Family, Signal Transduction, Software, Systems biology, User-Computer Interface}, issn = {1471-2105}, doi = {10.1186/1471-2105-8-114}, author = {Al-Shahrour, F{\'a}tima and Arbiza, Leonardo and Dopazo, Hern{\'a}n and Huerta-Cepas, Jaime and Minguez, Pablo and Montaner, David and Dopazo, Joaquin} } @article {17567924, title = {The human phylome}, journal = {Genome Biol}, volume = {8}, number = {6}, year = {2007}, note = {Huerta-Cepas, Jaime Dopazo, Hernan Dopazo, Joaquin Gabaldon, Toni Research Support, Non-U.S. Gov{\textquoteright}t England Genome biology Genome Biol. 2007;8(6):R109.}, pages = {R109}, abstract = {BACKGROUND: Phylogenomics analyses serve to establish evolutionary relationships among organisms and their genes. A phylome, the complete collection of all gene phylogenies in a genome, constitutes a valuable source of information, but its use in large genomes still constitutes a technical challenge. The use of phylomes also requires the development of new methods that help us to interpret them. RESULTS: We reconstruct here the human phylome, which includes the evolutionary relationships of all human proteins and their homologs among 39 fully sequenced eukaryotes. Phylogenetic techniques used include alignment trimming, branch length optimization, evolutionary model testing and maximum likelihood and Bayesian methods. Although differences with alternative topologies are minor, most of the trees support the Coelomata and Unikont hypotheses as well as the grouping of primates with laurasatheria to the exclusion of rodents. We assess the extent of gene duplication events and their relationship with the functional roles of the protein families involved. We find support for at least one, and probably two, rounds of whole genome duplications before vertebrate radiation. Using a novel algorithm that is independent from a species phylogeny, we derive orthology and paralogy relationships of human proteins among eukaryotic genomes. CONCLUSION: Topological variations among phylogenies for different genes are to be expected, highlighting the danger of gene-sampling effects in phylogenomic analyses. Several links can be established between the functions of gene families duplicated at certain phylogenetic splits and major evolutionary transitions in those lineages. The pipeline implemented here can be easily adapted for use in other organisms.}, keywords = {Animals *Evolution Evolution, DNA, Molecular Gene Duplication *Genome Humans *Phylogeny Proteins/genetics Sequence Analysis}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=17567924}, author = {Huerta-Cepas, J. and H. Dopazo and Dopazo, J. and Gabald{\'o}n, T.} } @article {17135190, title = {PeroxisomeDB: a database for the peroxisomal proteome, functional genomics and disease}, journal = {Nucleic Acids Res}, volume = {35}, number = {Database issue}, year = {2007}, note = {Schluter, Agatha Fourcade, Stephane Domenech-Estevez, Enric Gabaldon, Toni Huerta-Cepas, Jaime Berthommier, Guillaume Ripp, Raymond Wanders, Ronald J A Poch, Olivier Pujol, Aurora Research Support, Non-U.S. Gov{\textquoteright}t England Nucleic acids research Nucleic Acids Res. 2007 Jan;35(Database issue):D815-22. Epub 2006 Nov 28.}, pages = {D815-22}, abstract = {Peroxisomes are essential organelles of eukaryotic origin, ubiquitously distributed in cells and organisms, playing key roles in lipid and antioxidant metabolism. Loss or malfunction of peroxisomes causes more than 20 fatal inherited conditions. We have created a peroxisomal database (http://www.peroxisomeDB.org) that includes the complete peroxisomal proteome of Homo sapiens and Saccharomyces cerevisiae, by gathering, updating and integrating the available genetic and functional information on peroxisomal genes. PeroxisomeDB is structured in interrelated sections {\textquoteright}Genes{\textquoteright}, {\textquoteright}Functions{\textquoteright}, {\textquoteright}Metabolic pathways{\textquoteright} and {\textquoteright}Diseases{\textquoteright}, that include hyperlinks to selected features of NCBI, ENSEMBL and UCSC databases. We have designed graphical depictions of the main peroxisomal metabolic routes and have included updated flow charts for diagnosis. Precomputed BLAST, PSI-BLAST, multiple sequence alignment (MUSCLE) and phylogenetic trees are provided to assist in direct multispecies comparison to study evolutionary conserved functions and pathways. Highlights of the PeroxisomeDB include new tools developed for facilitating (i) identification of novel peroxisomal proteins, by means of identifying proteins carrying peroxisome targeting signal (PTS) motifs, (ii) detection of peroxisomes in silico, particularly useful for screening the deluge of newly sequenced genomes. PeroxisomeDB should contribute to the systematic characterization of the peroxisomal proteome and facilitate system biology approaches on the organelle.}, keywords = {Animals *Databases, Protein Genomics Humans Internet Mice Peroxisomal Disorders/*genetics Peroxisomes/*metabolism Protein Sorting Signals Proteome/chemistry/*genetics/*physiology Rats Saccharomyces cerevisiae Proteins/genetics/physiology Software User-Computer Interface}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=17135190}, author = {Schluter, A. and Fourcade, S. and Domenech-Estevez, E. and Gabald{\'o}n, T. and Huerta-Cepas, J. and Berthommier, G. and Ripp, R. and Wanders, R. J. and Poch, O. and Pujol, A.} } @article {17452346, title = {Phylemon: a suite of web tools for molecular evolution, phylogenetics and phylogenomics}, journal = {Nucleic Acids Res}, volume = {35}, number = {Web Server issue}, year = {2007}, note = {Tarraga, Joaquin Medina, Ignacio Arbiza, Leonardo Huerta-Cepas, Jaime Gabaldon, Toni Dopazo, Joaquin Dopazo, Hernan Research Support, Non-U.S. Gov{\textquoteright}t England Nucleic acids research Nucleic Acids Res. 2007 Jul;35(Web Server issue):W38-42. Epub 2007 Apr 22.}, pages = {W38-42}, abstract = {Phylemon is an online platform for phylogenetic and evolutionary analyses of molecular sequence data. It has been developed as a web server that integrates a suite of different tools selected among the most popular stand-alone programs in phylogenetic and evolutionary analysis. It has been conceived as a natural response to the increasing demand of data analysis of many experimental scientists wishing to add a molecular evolution and phylogenetics insight into their research. Tools included in Phylemon cover a wide yet selected range of programs: from the most basic for multiple sequence alignment to elaborate statistical methods of phylogenetic reconstruction including methods for evolutionary rates analyses and molecular adaptation. Phylemon has several features that differentiates it from other resources: (i) It offers an integrated environment that enables the direct concatenation of evolutionary analyses, the storage of results and handles required data format conversions, (ii) Once an outfile is produced, Phylemon suggests the next possible analyses, thus guiding the user and facilitating the integration of multi-step analyses, and (iii) users can define and save complete pipelines for specific phylogenetic analysis to be automatically used on many genes in subsequent sessions or multiple genes in a single session (phylogenomics). The Phylemon web server is available at http://phylemon.bioinfo.cipf.es.}, keywords = {Animals Computational Biology/*methods Databases, DNA Sequence Analysis, Genetic Evolution, Molecular Genetic Techniques Humans *Internet Models, Protein Software User-Computer Interface, Statistical *Phylogeny Programming Languages Sequence Alignment Sequence Analysis}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=17452346}, author = {Tarraga, J. and Medina, Ignacio and Arbiza, L. and Huerta-Cepas, J. and Gabald{\'o}n, T. and Dopazo, J. and H. Dopazo} } @inbook {476, title = {Reconstruction of ancestral proteomes}, booktitle = {Ancestral Sequence Reconstruction}, year = {2007}, publisher = {D. Liberles}, organization = {D. Liberles}, address = {Oxford}, url = {http://www.us.oup.com/us/catalog/general/subject/LifeSciences/EvolutionaryBiology/?view=usa\&ci=9780199299188}, author = {Gabald{\'o}n, T. and M. A. Huynen} } @article {16823138, title = {Blast2GO goes grid: developing a grid-enabled prototype for functional genomics analysis}, journal = {Stud Health Technol Inform}, volume = {120}, year = {2006}, note = {

Aparicio, G Gotz, S Conesa, A Segrelles, D Blanquer, I Garcia, J M Hernandez, V Robles, M Talon, M Netherlands Studies in health technology and informatics Stud Health Technol Inform. 2006;120:194-204.

}, pages = {194-204}, abstract = {

The vast amount in complexity of data generated in Genomic Research implies that new dedicated and powerful computational tools need to be developed to meet their analysis requirements. Blast2GO (B2G) is a bioinformatics tool for Gene Ontology-based DNA or protein sequence annotation and function-based data mining. The application has been developed with the aim of affering an easy-to-use tool for functional genomics research. Typical B2G users are middle size genomics labs carrying out sequencing, ETS and microarray projects, handling datasets up to several thousand sequences. In the current version of B2G. The power and analytical potential of both annotation and function data-mining is somehow restricted to the computational power behind each particular installation. In order to be able to offer the possibility of an enhanced computational capacity within this bioinformatics application, a Grid component is being developed. A prototype has been conceived for the particular problem of speeding up the Blast searches to obtain fast results for large datasets. Many efforts have been done in the literature concerning the speeding up of Blast searches, but few of them deal with the use of large heterogeneous production Grid Infrastructures. These are the infrastructures that could reach the largest number of resources and the best load balancing for data access. The Grid Service under development will analyse requests based on the number of sequences, splitting them accordingly to the available resources. Lower-level computation will be performed through MPIBLAST. The software architecture is based on the WSRF standard.

}, keywords = {babelomics}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=16823138}, author = {Aparicio, G. and Gotz, S. and A. Conesa and Segrelles, D. and Blanquer, I. and Garcia, J. M. and Hernandez, V. and Robles, M. and Talon, M.} } @article {16845056, title = {Next station in microarray data analysis: GEPAS}, journal = {Nucleic Acids Res}, volume = {34}, year = {2006}, note = {

Montaner, David Tarraga, Joaquin Huerta-Cepas, Jaime Burguet, Jordi Vaquerizas, Juan M Conde, Lucia Minguez, Pablo Vera, Javier Mukherjee, Sach Valls, Joan Pujana, Miguel A G Alloza, Eva Herrero, Javier Al-Shahrour, Fatima Dopazo, Joaquin Research Support, Non-U.S. Gov{\textquoteright}t England Nucleic acids research Nucleic Acids Res. 2006 Jul 1;34(Web Server issue):W486-91.

}, pages = {W486-91}, abstract = {

The Gene Expression Profile Analysis Suite (GEPAS) has been running for more than four years. During this time it has evolved to keep pace with the new interests and trends in the still changing world of microarray data analysis. GEPAS has been designed to provide an intuitive although powerful web-based interface that offers diverse analysis options from the early step of preprocessing (normalization of Affymetrix and two-colour microarray experiments and other preprocessing options), to the final step of the functional annotation of the experiment (using Gene Ontology, pathways, PubMed abstracts etc.), and include different possibilities for clustering, gene selection, class prediction and array-comparative genomic hybridization management. GEPAS is extensively used by researchers of many countries and its records indicate an average usage rate of 400 experiments per day. The web-based pipeline for microarray gene expression data, GEPAS, is available at http://www.gepas.org.

}, keywords = {gepas, microarray data analysis}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=16845056}, author = {Montaner, D. and Tarraga, J. and Huerta-Cepas, J. and Burguet, J. and Vaquerizas, J. M. and L. Conde and Minguez, P. and Vera, J. and Mukherjee, S. and Valls, J. and Pujana, M. A. and Alloza, E. and Herrero, J. and Fatima Al-Shahrour and Dopazo, J.} } @article {16556314, title = {Origin and evolution of the peroxisomal proteome}, journal = {Biol Direct}, volume = {1}, year = {2006}, note = {Gabaldon, Toni Snel, Berend van Zimmeren, Frank Hemrika, Wieger Tabak, Henk Huynen, Martijn A England Biology direct Biol Direct. 2006 Mar 23;1:8.}, pages = {8}, abstract = {BACKGROUND: Peroxisomes are ubiquitous eukaryotic organelles involved in various oxidative reactions. Their enzymatic content varies between species, but the presence of common protein import and organelle biogenesis systems support a single evolutionary origin. The precise scenario for this origin remains however to be established. The ability of peroxisomes to divide and import proteins post-translationally, just like mitochondria and chloroplasts, supports an endosymbiotic origin. However, this view has been challenged by recent discoveries that mutant, peroxisome-less cells restore peroxisomes upon introduction of the wild-type gene, and that peroxisomes are formed from the Endoplasmic Reticulum. The lack of a peroxisomal genome precludes the use of classical analyses, as those performed with mitochondria or chloroplasts, to settle the debate. We therefore conducted large-scale phylogenetic analyses of the yeast and rat peroxisomal proteomes. RESULTS : Our results show that most peroxisomal proteins (39-58\%) are of eukaryotic origin, comprising all proteins involved in organelle biogenesis or maintenance. A significant fraction (13-18\%), consisting mainly of enzymes, has an alpha-proteobacterial origin and appears to be the result of the recruitment of proteins originally targeted to mitochondria. Consistent with the findings that peroxisomes are formed in the Endoplasmic Reticulum, we find that the most universally conserved Peroxisome biogenesis and maintenance proteins are homologous to proteins from the Endoplasmic Reticulum Assisted Decay pathway. CONCLUSION: Altogether our results indicate that the peroxisome does not have an endosymbiotic origin and that its proteins were recruited from pools existing within the primitive eukaryote. Moreover the reconstruction of primitive peroxisomal proteomes suggests that ontogenetically as well as phylogenetically, peroxisomes stem from the Endoplasmic Reticulum. REVIEWERS: This article was reviewed by Arcady Mushegian, Gaspar Jekely and John Logsdon. OPEN PEER REVIEW: Reviewed by Arcady Mushegian, Gaspar Jekely and John Logsdon. For the full reviews, please go to the Reviewers{\textquoteright} comments section.}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=16556314}, author = {Gabald{\'o}n, T. and B. Snel and van Zimmeren, F. and Hemrika, W. and Tabak, H. and M. A. Huynen} } @inbook {481, title = {Reliable and specific protein function prediction by combining homology with genomic(s) context}, booktitle = {Discovery of biomolecular mechanisms with theoretical data analyses}, year = {2006}, publisher = {F. Eisenhaber, Landes Bioscience}, organization = {F. Eisenhaber, Landes Bioscience}, issn = {0-387-34527}, url = {http://www.landesbioscience.com/iu/output.php?id=479}, author = {M. A. Huynen and B. Snel and Gabald{\'o}n T} } @article {15744302, title = {An anaerobic mitochondrion that produces hydrogen}, journal = {Nature}, volume = {434}, number = {7029}, year = {2005}, note = {Boxma, Brigitte de Graaf, Rob M van der Staay, Georg W M van Alen, Theo A Ricard, Guenola Gabaldon, Toni van Hoek, Angela H A M Moon-van der Staay, Seung Yeo Koopman, Werner J H van Hellemond, Jaap J Tielens, Aloysius G M Friedrich, Thorsten Veenhuis, Marten Huynen, Martijn A Hackstein, Johannes H P Research Support, Non-U.S. Gov{\textquoteright}t England Nature Nature. 2005 Mar 3;434(7029):74-9.}, pages = {74-9}, abstract = {Hydrogenosomes are organelles that produce ATP and hydrogen, and are found in various unrelated eukaryotes, such as anaerobic flagellates, chytridiomycete fungi and ciliates. Although all of these organelles generate hydrogen, the hydrogenosomes from these organisms are structurally and metabolically quite different, just like mitochondria where large differences also exist. These differences have led to a continuing debate about the evolutionary origin of hydrogenosomes. Here we show that the hydrogenosomes of the anaerobic ciliate Nyctotherus ovalis, which thrives in the hindgut of cockroaches, have retained a rudimentary genome encoding components of a mitochondrial electron transport chain. Phylogenetic analyses reveal that those proteins cluster with their homologues from aerobic ciliates. In addition, several nucleus-encoded components of the mitochondrial proteome, such as pyruvate dehydrogenase and complex II, were identified. The N. ovalis hydrogenosome is sensitive to inhibitors of mitochondrial complex I and produces succinate as a major metabolic end product{\textendash}biochemical traits typical of anaerobic mitochondria. The production of hydrogen, together with the presence of a genome encoding respiratory chain components, and biochemical features characteristic of anaerobic mitochondria, identify the N. ovalis organelle as a missing link between mitochondria and hydrogenosomes.}, keywords = {*Anaerobiosis Animals Ciliophora/*cytology/genetics/*metabolism/ultrastructure Cockroaches/parasitology DNA, Mitochondrial/genetics Electron Transport Electron Transport Complex I/antagonists \& inhibitors/metabolism Genome Glucose/metabolism Hydrogen/*metabolism Mitochondria/enzymology/genetics/*metabolism/ultrastructure Molecular Sequence Data Open Reading Fra}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=15744302}, author = {Boxma, B. and de Graaf, R. M. and van der Staay, G. W. and van Alen, T. A. and Ricard, G. and Gabald{\'o}n, T. and van Hoek, A. H. and Moon-van der Staay, S. Y. and Koopman, W. J. and van Hellemond, J. J. and Tielens, A. G. and Friedrich, T. and Veenhuis, M. and M. A. Huynen and Hackstein, J. H.} } @article {15670813, title = {Combining data from genomes, Y2H and 3D structure indicates that BolA is a reductase interacting with a glutaredoxin}, journal = {FEBS Lett}, volume = {579}, number = {3}, year = {2005}, note = {Huynen, Martijn A Spronk, Chris A E M Gabaldon, Toni Snel, Berend Research Support, Non-U.S. Gov{\textquoteright}t Netherlands FEBS letters FEBS Lett. 2005 Jan 31;579(3):591-6.}, pages = {591-6}, abstract = {Genomes, functional genomics data and 3D structure reflect different aspects of protein function. Here, we combine these data to predict that BolA, a widely distributed protein family with unknown function, is a reductase that interacts with a glutaredoxin. Comparisons at the 3D structure level as well as at the sequence profile level indicate homology between BolA and OsmC, an enzyme that reduces organic peroxides. Complementary to this, comparative analyses of genomes and genomics data provide strong evidence of an interaction between BolA and the mono-thiol glutaredoxin family. The interaction between BolA and a mono-thiol glutaredoxin is of particular interest because BolA does not, in contrast to its homolog OsmC, have evolutionarily conserved cysteines to provide it with reducing equivalents. We propose that BolA uses the mono-thiol glutaredoxin as the source for these.}, keywords = {*Genome Glutaredoxins Models, Molecular Oxidoreductases/chemistry/*metabolism Phylogeny Protein Conformation}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=15670813}, author = {M. A. Huynen and Spronk, C. A. and Gabald{\'o}n, T. and B. Snel} } @article {16170324, title = {The C-type lectin fold as an evolutionary solution for massive sequence variation}, journal = {Nat Struct Mol Biol}, volume = {12}, number = {10}, year = {2005}, note = {McMahon, Stephen A Miller, Jason L Lawton, Jeffrey A Kerkow, Donald E Hodes, Asher Marti-Renom, Marc A Doulatov, Sergei Narayanan, Eswar Sali, Andrej Miller, Jeff F Ghosh, Partho F31AI061840/AI/NIAID NIH HHS/United States F32AI49695/AI/NIAID NIH HHS/United States T32GM008326/GM/NIGMS NIH HHS/United States Research Support, N.I.H., Extramural Research Support, Non-U.S. Gov{\textquoteright}t Research Support, U.S. Gov{\textquoteright}t, P.H.S. United States Nature structural \& molecular biology Nat Struct Mol Biol. 2005 Oct;12(10):886-92. Epub 2005 Sep 18.}, pages = {886-92}, abstract = {Only few instances are known of protein folds that tolerate massive sequence variation for the sake of binding diversity. The most extensively characterized is the immunoglobulin fold. We now add to this the C-type lectin (CLec) fold, as found in the major tropism determinant (Mtd), a retroelement-encoded receptor-binding protein of Bordetella bacteriophage. Variation in Mtd, with its approximately 10(13) possible sequences, enables phage adaptation to Bordetella spp. Mtd is an intertwined, pyramid-shaped trimer, with variable residues organized by its CLec fold into discrete receptor-binding sites. The CLec fold provides a highly static scaffold for combinatorial display of variable residues, probably reflecting a different evolutionary solution for balancing diversity against stability from that in the immunoglobulin fold. Mtd variants are biased toward the receptor pertactin, and there is evidence that the CLec fold is used broadly for sequence variation by related retroelements.}, keywords = {Amino Acid Sequence Bacterial Outer Membrane Proteins/*chemistry Bacteriophages/*metabolism Bordetella/*virology Evolution, Bordetella/*chemistry, C-Type/*chemistry Molecular Sequence Data Protein Conformation Protein Folding Viral Proteins/*chemistry/*genetics Virulence Factors, Molecular Genetic Variation Genome, Viral Lectins}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=16170324}, author = {McMahon, S. A. and Miller, J. L. and Lawton, J. A. and Kerkow, D. E. and Hodes, A. and M. A. Marti-Renom and Doulatov, S. and Narayanan, E. and Sali, A. and Miller, J. F. and Ghosh, P.} } @article {15830128, title = {Development of a citrus genome-wide EST collection and cDNA microarray as resources for genomic studies}, journal = {Plant Mol Biol}, volume = {57}, number = {3}, year = {2005}, note = {Forment, J Gadea, J Huerta, L Abizanda, L Agusti, J Alamar, S Alos, E Andres, F Arribas, R Beltran, J P Berbel, A Blazquez, M A Brumos, J Canas, L A Cercos, M Colmenero-Flores, J M Conesa, A Estables, B Gandia, M Garcia-Martinez, J L Gimeno, J Gisbert, A Gomez, G Gonzalez-Candelas, L Granell, A Guerri, J Lafuente, M T Madueno, F Marcos, J F Marques, M C Martinez, F Martinez-Godoy, M A Miralles, S Moreno, P Navarro, L Pallas, V Perez-Amador, M A Perez-Valle, J Pons, C Rodrigo, I Rodriguez, P L Royo, C Serrano, R Soler, G Tadeo, F Talon, M Terol, J Trenor, M Vaello, L Vicente, O Vidal, Ch Zacarias, L Conejero, V Comparative Study Research Support, U.S. Gov{\textquoteright}t, Non-P.H.S. Netherlands Plant molecular biology Plant Mol Biol. 2005 Feb;57(3):375-91.}, pages = {375-91}, abstract = {A functional genomics project has been initiated to approach the molecular characterization of the main biological and agronomical traits of citrus. As a key part of this project, a citrus EST collection has been generated from 25 cDNA libraries covering different tissues, developmental stages and stress conditions. The collection includes a total of 22,635 high-quality ESTs, grouped in 11,836 putative unigenes, which represent at least one third of the estimated number of genes in the citrus genome. Functional annotation of unigenes which have Arabidopsis orthologues (68\% of all unigenes) revealed gene representation in every major functional category, suggesting that a genome-wide EST collection was obtained. A Citrus clementina Hort. ex Tan. cv. Clemenules genomic library, that will contribute to further characterization of relevant genes, has also been constructed. To initiate the analysis of citrus transcriptome, we have developed a cDNA microarray containing 12,672 probes corresponding to 6875 putative unigenes of the collection. Technical characterization of the microarray showed high intra- and inter-array reproducibility, as well as a good range of sensitivity. We have also validated gene expression data achieved with this microarray through an independent technique such as RNA gel blot analysis.}, keywords = {Citrus/*genetics DNA, Complementary/chemistry/genetics *Expressed Sequence Tags Gene Expression Profiling Gene Library *Genome, DNA, Plant Genomics/*methods Molecular Sequence Data Oligonucleotide Array Sequence Analysis/*methods RNA, Plant/genetics/metabolism Reproducibility of Results Sequence Analysis}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=15830128}, author = {J. Forment and J. Gadea and Huerta, L. and Abizanda, L. and Agusti, J. and Alamar, S. and Alos, E. and Andres, F. and Arribas, R. and Beltran, J. P. and Berbel, A. and Blazquez, M. A. and Brumos, J. and Canas, L. A. and Cercos, M. and Colmenero-Flores, J. M. and A. Conesa and Estables, B. and Gandia, M. and Garcia-Martinez, J. L. and Gimeno, J. and Gisbert, A. and Gomez, G. and Gonzalez-Candelas, L. and Granell, A. and Guerri, J. and Lafuente, M. T. and Madueno, F. and Marcos, J. F. and Marques, M. C. and Martinez, F. and Martinez-Godoy, M. A. and Miralles, S. and Moreno, P. and Navarro, L. and Pallas, V. and Perez-Amador, M. A. and Perez-Valle, J. and Pons, C. and Rodrigo, I. and Rodriguez, P. L. and Royo, C. and Serrano, R. and Soler, G. and Tadeo, F. and Talon, M. and Terol, J. and Trenor, M. and Vaello, L. and Vicente, O. and Vidal, Ch and Zacarias, L. and Conejero, V.} } @article {15980548, title = {GEPAS, an experiment-oriented pipeline for the analysis of microarray gene expression data}, journal = {Nucleic Acids Res}, volume = {33}, year = {2005}, note = {

Vaquerizas, Juan M Conde, Lucia Yankilevich, Patricio Cabezon, Amaya Minguez, Pablo Diaz-Uriarte, Ramon Al-Shahrour, Fatima Herrero, Javier Dopazo, Joaquin Research Support, Non-U.S. Gov{\textquoteright}t England Nucleic acids research Nucleic Acids Res. 2005 Jul 1;33(Web Server issue):W616-20.

}, pages = {W616-20}, abstract = {

The Gene Expression Profile Analysis Suite, GEPAS, has been running for more than three years. With \>76,000 experiments analysed during the last year and a daily average of almost 300 analyses, GEPAS can be considered a well-established and widely used platform for gene expression microarray data analysis. GEPAS is oriented to the analysis of whole series of experiments. Its design and development have been driven by the demands of the biomedical community, probably the most active collective in the field of microarray users. Although clustering methods have obviously been implemented in GEPAS, our interest has focused more on methods for finding genes differentially expressed among distinct classes of experiments or correlated to diverse clinical outcomes, as well as on building predictors. There is also a great interest in CGH-arrays which fostered the development of the corresponding tool in GEPAS: InSilicoCGH. Much effort has been invested in GEPAS for developing and implementing efficient methods for functional annotation of experiments in the proper statistical framework. Thus, the popular FatiGO has expanded to a suite of programs for functional annotation of experiments, including information on transcription factor binding sites, chromosomal location and tissues. The web-based pipeline for microarray gene expression data, GEPAS, is available at http://www.gepas.org.

}, keywords = {gepas, microarray data analysis}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=15980548}, author = {Vaquerizas, J. M. and L. Conde and Yankilevich, P. and Cabezon, A. and Minguez, P. and Diaz-Uriarte, R. and Fatima Al-Shahrour and Herrero, J. and Dopazo, J.} } @article {15608250, title = {HCAD, closing the gap between breakpoints and genes}, journal = {Nucleic Acids Res}, volume = {33}, number = {Database issue}, year = {2005}, note = {Hoffmann, Robert Dopazo, Joaquin Cigudosa, Juan C Valencia, Alfonso Research Support, Non-U.S. Gov{\textquoteright}t England Nucleic acids research Nucleic Acids Res. 2005 Jan 1;33(Database issue):D511-3.}, pages = {D511-3}, abstract = {Recurrent chromosome aberrations are an important resource when associating human pathologies to specific genes. However, for technical reasons a large number of chromosome breakpoints are defined only at the level of cytobands and many of the genes involved remain unidentified. We developed a web-based information system that mines the scientific literature and generates textual and comprehensive information on all human breakpoints. We show that the statistical analysis of this textual information and its combination with genomic data can identify genes directly involved in DNA rearrangements. The Human Chromosome Aberration Database (HCAD) is publicly accessible at http://www.pdg.cnb.uam.es/UniPub/HCAD/.}, keywords = {*Chromosome Breakage Chromosome Disorders/diagnosis/*genetics *Databases, Genetic Genes *Genetic Predisposition to Disease Humans PubMed Systems Integration}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=15608250}, author = {Hoffmann, R. and Dopazo, J. and Cigudosa, J. C. and Valencia, A.} } @article {16204094, title = {Lineage-specific gene loss following mitochondrial endosymbiosis and its potential for function prediction in eukaryotes}, journal = {Bioinformatics}, volume = {21 Suppl 2}, year = {2005}, note = {Gabaldon, Toni Huynen, Martijn A Research Support, Non-U.S. Gov{\textquoteright}t England Bioinformatics (Oxford, England) Bioinformatics. 2005 Sep 1;21 Suppl 2:ii144-50.}, pages = {ii144-50}, abstract = {MOTIVATION: The endosymbiotic origin of mitochondria has resulted in a massive horizontal transfer of genetic material from an alpha-proteobacterium to the early eukaryotes. Using large-scale phylogenetic analysis we have previously identified 630 orthologous groups of proteins derived from this event. Here we show that this proto-mitochondrial protein set has undergone extensive lineage-specific gene loss in the eukaryotes, with an average of three losses per orthologous group in a phylogeny of nine species. This gene loss has resulted in a high variability of the alphaproteobacterial-derived gene content of present-day eukaryotic genomes that might reflect functional adaptation to different environments. Proteins functioning in the same biochemical pathway tend to have a similar history of gene loss events, and we use this property to predict functional interactions among proteins in our set.}, keywords = {Animals Chromosome Mapping/*methods DNA, Mitochondrial/*genetics *Evolution, Molecular *Gene Deletion Genetic Variation/genetics Humans Linkage Disequilibrium/*genetics Mitochondrial Proteins/*genetics Sequence Homology, Nucleic Acid Species Specificity Symbiosis/*genetics}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=16204094}, author = {Gabald{\'o}n, T. and M. A. Huynen} } @article {15609347, title = {A novel candidate region linked to development of both pheochromocytoma and head/neck paraganglioma}, journal = {Genes Chromosomes Cancer}, volume = {42}, number = {3}, year = {2005}, note = {Cascon, Alberto Ruiz-Llorente, Sergio Rodriguez-Perales, Sandra Honrado, Emiliano Martinez-Ramirez, Angel Leton, Rocio Montero-Conde, Cristina Benitez, Javier Dopazo, Joaquin Cigudosa, Juan C Robledo, Mercedes Research Support, Non-U.S. Gov{\textquoteright}t United States Genes, chromosomes \& cancer Genes Chromosomes Cancer. 2005 Mar;42(3):260-8.}, pages = {260-8}, abstract = {Although the histologic distinction between pheochromocytomas and head and neck paragangliomas is clear, little is known about the genetic differences between them. To date, various sets of genes have been found to be involved in inherited susceptibility to developing both tumor types, but the genes involved in sporadic pathogenesis are still unknown. To define new candidate regions, we performed CGH analysis on 29 pheochromocytomas and on 24 paragangliomas mainly of head and neck origin (20 of 24), which allowed us to differentiate between the two tumor types. Loss of 3q was significantly more frequent in pheochromocytomas, and loss of 1q appeared only in paragangliomas. We also found gain of 11q13 to be a significantly frequent alteration in malignant cases of both types. In addition, recurrent loss of 8p22-23 was found in 62\% of pheochromocytomas (including all malignant cases) versus in 33\% of paragangliomas, suggesting that this region contains candidate genes involved in the pathogenesis of this abnormality. Using FISH analysis on tissue microarrays, we confirmed genomic deletion of this region in 55\% of pheochromocytomas compared to 12\% of paragangliomas. Loss of 8p22-23 appears to be an important event in the sporadic development of these tumors, and additional molecular studies are necessary to identify candidate genes in this chromosomal region.}, keywords = {80 and over Child Chromosomes, Adolescent Adrenal Gland Neoplasms/*genetics Adult Aged Aged, Biological/*genetics, Human, Pair 1/genetics Chromosomes, Pair 11/genetics Chromosomes, Pair 3/genetics Chromosomes, Pair 8/genetics Female Gene Deletion Head and Neck Neoplasms/*genetics Humans Male Middle Aged Nucleic Acid Hybridization Paraganglioma/*genetics Pheochromocytoma/*genetics Tumor Markers}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=15609347}, author = {Cascon, A. and Ruiz-Llorente, S. and Rodriguez-Perales, S. and Honrado, E. and Martinez-Ramirez, A. and Leton, R. and Montero-Conde, C. and Benitez, J. and Dopazo, J. and Cigudosa, J. C. and M. Robledo} } @article {15770521, title = {Phenotypic characterization of BRCA1 and BRCA2 tumors based in a tissue microarray study with 37 immunohistochemical markers}, journal = {Breast Cancer Res Treat}, volume = {90}, number = {1}, year = {2005}, note = {Palacios, Jose Honrado, Emiliano Osorio, Ana Cazorla, Alicia Sarrio, David Barroso, Alicia Rodriguez, Sandra Cigudosa, Juan C Diez, Orland Alonso, Carmen Lerma, Enrique Dopazo, Joaquin Rivas, Carmen Benitez, Javier Research Support, Non-U.S. Gov{\textquoteright}t Netherlands Breast cancer research and treatment Breast Cancer Res Treat. 2005 Mar;90(1):5-14.}, pages = {5-14}, abstract = {Familial breast cancers that are associated with BRCA1 or BRCA2 germline mutations differ in both their morphological and immunohistochemical characteristics. To further characterize the molecular difference between genotypes, the authors evaluated the expression of 37 immunohistochemical markers in a tissue microarray (TMA) containing cores from 20 BRCA1, 14 BRCA2, and 59 sporadic age-matched breast carcinomas. Markers analyzed included, amog others, common markers in breast cancer, such as hormone receptors, p53 and HER2, along with 15 molecules involved in cell cycle regulation, such as cyclins, cyclin dependent kinases (CDK) and CDK inhibitors (CDKI), apoptosis markers, such as BCL2 and active caspase 3, and two basal/myoepithelial markers (CK 5/6 and P-cadherin). In addition, we analyzed the amplification of CCND1, CCNE, HER2 and MYC by FISH.Unsupervised cluster data analysis of both hereditary and sporadic cases using the complete set of immunohistochemical markers demonstrated that most BRCA1-associated carcinomas grouped in a branch of ER-, HER2-negative tumors that expressed basal cell markers and/or p53 and had higher expression of activated caspase 3. The cell cycle proteins associated with these tumors were E2F6, cyclins A, B1 and E, SKP2 and Topo IIalpha. In contrast, most BRCA2-associated carcinomas grouped in a branch composed by ER/PR/BCL2-positive tumors with a higher expression of the cell cycle proteins cyclin D1, cyclin D3, p27, p16, p21, CDK4, CDK2 and CDK1. In conclusion, our study in hereditary breast cancer tumors analyzing 37 immunohistochemical markers, define the molecular differences between BRCA1 and BRCA2 tumors with respect to hormonal receptors, cell cycle, apoptosis and basal cell markers.}, keywords = {Adult Apoptosis Breast Neoplasms/*genetics/*pathology Cell Cycle Proteins Cluster Analysis Female *Genes, Biological/genetics/metabolism, BRCA1 *Genes, BRCA2 Humans Immunohistochemistry In Situ Hybridization, Fluorescence Phenotype Spain *Tissue Array Analysis *Tumor Markers}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=15770521}, author = {Palacios, J. and Honrado, E. and Osorio, A. and Cazorla, A. and Sarrio, D. and Barroso, A. and Rodriguez, S. and Cigudosa, J. C. and Diez, O. and Alonso, C. and Lerma, E. and Dopazo, J. and Rivas, C. and Benitez, J.} } @article {15709182, title = {A predictor based on the somatic genomic changes of the BRCA1/BRCA2 breast cancer tumors identifies the non-BRCA1/BRCA2 tumors with BRCA1 promoter hypermethylation}, journal = {Clin Cancer Res}, volume = {11}, number = {3}, year = {2005}, note = {Alvarez, Sara Diaz-Uriarte, Ramon Osorio, Ana Barroso, Alicia Melchor, Lorenzo Paz, Maria Fe Honrado, Emiliano Rodriguez, Raquel Urioste, Miguel Valle, Laura Diez, Orland Cigudosa, Juan Cruz Dopazo, Joaquin Esteller, Manel Benitez, Javier Comparative Study Research Support, Non-U.S. Gov{\textquoteright}t United States Clinical cancer research : an official journal of the American Association for Cancer Research Clin Cancer Res. 2005 Feb 1;11(3):1146-53.}, pages = {1146-53}, abstract = {The genetic changes underlying in the development and progression of familial breast cancer are poorly understood. To identify a somatic genetic signature of tumor progression for each familial group, BRCA1, BRCA2, and non-BRCA1/BRCA2 (BRCAX) tumors, by high-resolution comparative genomic hybridization, we have analyzed 77 tumors previously characterized for BRCA1 and BRCA2 germ line mutations. Based on a combination of the somatic genetic changes observed at the six most different chromosomal regions and the status of the estrogen receptor, we developed using random forests a molecular classifier, which assigns to a given tumor a probability to belong either to the BRCA1 or to the BRCA2 class. Because 76.5\% (26 of 34) of the BRCAX cases were classified with our predictor to the BRCA1 class with a probability of >50\%, we analyzed the BRCA1 promoter region for aberrant methylation in all the BRCAX cases. We found that 15 of the 34 BRCAX analyzed tumors had hypermethylation of the BRCA1 gene. When we considered the predictor, we observed that all the cases with this epigenetic event were assigned to the BRCA1 class with a probability of >50\%. Interestingly, 84.6\% of the cases (11 of 13) assigned to the BRCA1 class with a probability >80\% had an aberrant methylation of the BRCA1 promoter. This fact suggests that somatic BRCA1 inactivation could modify the profile of tumor progression in most of the BRCAX cases.}, keywords = {BRCA1 Protein/*genetics BRCA2 Protein/*genetics Breast Neoplasms/*genetics/pathology Chromosomes, Genetic/*genetics, Human, Human Humans Male Mutation Nucleic Acid Hybridization/methods Promoter Regions, Pair 12/genetics Chromosomes, Pair 15/genetics Chromosomes, Pair 18/genetics Chromosomes, Pair 2/genetics Chromosomes, Pair 8/genetics *DNA Methylation Female Genome}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=15709182}, author = {Alvarez, S. and Diaz-Uriarte, R. and Osorio, A. and Barroso, A. and Melchor, L. and Paz, M. F. and Honrado, E. and Rodriguez, R. and Urioste, M. and Valle, L. and Diez, O. and Cigudosa, J. C. and Dopazo, J. and Esteller, M. and Benitez, J.} } @inbook {486, title = {Salinibacter ruber: genomics and biogeography}, booktitle = {Adaptation to life in high salt concentrations in Archaea, Bacteria and Eukarya}, volume = {9}, year = {2005}, pages = {257-266}, publisher = {Nina Gunde-Cimerman, Ana Plemenitas, and Aharon Oren. Kluwer Academic Publishers}, organization = {Nina Gunde-Cimerman, Ana Plemenitas, and Aharon Oren. Kluwer Academic Publishers}, address = {Dordrecht, Netherlands}, author = {Ant{\'o}n, J and Pe{\~n}a, A and Valens, M and Santos, F and Gl{\"o}ckner, F.O and Bauer, M and Dopazo, J. and Herrero, J. and Rossell{\'o}-Mora, R and Amann, R} } @article {15843018, title = {Tracing the evolution of a large protein complex in the eukaryotes, NADH:ubiquinone oxidoreductase (Complex I)}, journal = {J Mol Biol}, volume = {348}, number = {4}, year = {2005}, note = {Gabaldon, Toni Rainey, Daphne Huynen, Martijn A Research Support, Non-U.S. Gov{\textquoteright}t England Journal of molecular biology J Mol Biol. 2005 May 13;348(4):857-70.}, pages = {857-70}, abstract = {The increasing availability of sequenced genomes enables the reconstruction of the evolutionary history of large protein complexes. Here, we trace the evolution of NADH:ubiquinone oxidoreductase (Complex I), which has increased in size, by so-called supernumary subunits, from 14 subunits in the bacteria to 30 in the plants and algae, 37 in the fungi and 46 in the mammals. Using a combination of pair-wise and profile-based sequence comparisons at the levels of proteins and the DNA of the sequenced eukaryotic genomes, combined with phylogenetic analyses to establish orthology relationships, we were able to (1) trace the origin of six of the supernumerary subunits to the alpha-proteobacterial ancestor of the mitochondria, (2) detect previously unidentified homology relations between subunits from fungi and mammals, (3) detect previously unidentified subunits in the genomes of several species and (4) document several cases of gene duplications among supernumerary subunits in the eukaryotes. One of these, a duplication of N7BM (B17.2), is particularly interesting as it has been lost from genomes that have also lost Complex I proteins, making it a candidate for a Complex I interacting protein. A parsimonious reconstruction of eukaryotic Complex I evolution shows an initial increase in size that predates the separation of plants, fungi and metazoa, followed by a gradual adding and incidental losses of subunits in the various evolutionary lineages. This evolutionary scenario is in contrast to that for Complex I in the prokaryotes, for which the combination of several separate, and previously independently functioning modules into a single complex has been proposed.}, keywords = {Amino Acid Sequence Animals Computational Biology Electron Transport Complex I/*chemistry/*genetics/metabolism Eukaryotic Cells/*enzymology *Evolution, Molecular Humans Molecular Sequence Data Photosynthesis Phylogeny Plastids/enzymology Protein Binding Protein Subunits/chemistry/genetics/metabolism Sequence Alignment Structural Homology, Protein}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=15843018}, author = {Gabald{\'o}n, T. and Rainey, D. and M. A. Huynen} } @article {15763561, title = {Variation and evolution of biomolecular systems: searching for functional relevance}, journal = {FEBS Lett}, volume = {579}, number = {8}, year = {2005}, note = {Huynen, Martijn A Gabaldon, Toni Snel, Berend Review Netherlands FEBS letters FEBS Lett. 2005 Mar 21;579(8):1839-45.}, pages = {1839-45}, abstract = {The availability of genome sequences and functional genomics data from multiple species enables us to compare the composition of biomolecular systems like biochemical pathways and protein complexes between species. Here, we review small- and large-scale, "genomics-based" approaches to biomolecular systems variation. In general, caution is required when comparing the results of bioinformatics analyses of genomes or of functional genomics data between species. Limitations to the sensitivity of sequence analysis tools and the noisy nature of genomics data tend to lead to systematic overestimates of the amount of variation. Nevertheless, the results from detailed manual analyses, and of large-scale analyses that filter out systematic biases, point to a large amount of variation in the composition of biomolecular systems. Such observations challenge our understanding of the function of the systems and their individual components and can potentially facilitate the identification and functional characterization of sub-systems within a system. Mapping the inter-species variation of complex biomolecular systems on a phylogenetic species tree allows one to reconstruct their evolution.}, keywords = {*Evolution, Molecular Genetic Variation Multiprotein Complexes/*genetics Phylogeny Protein Binding/genetics}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=15763561}, author = {M. A. Huynen and Gabald{\'o}n, T. and B. Snel} } @article {14681398, title = {MODBASE, a database of annotated comparative protein structure models, and associated resources}, journal = {Nucleic Acids Res}, volume = {32}, number = {Database issue}, year = {2004}, note = {Pieper, Ursula Eswar, Narayanan Braberg, Hannes Madhusudhan, M S Davis, Fred P Stuart, Ashley C Mirkovic, Nebojsa Rossi, Andrea Marti-Renom, Marc A Fiser, Andras Webb, Ben Greenblatt, Daniel Huang, Conrad C Ferrin, Thomas E Sali, Andrej P41 RR01081/RR/NCRR NIH HHS/United States P50 GM62529/GM/NIGMS NIH HHS/United States R01 GM 54762/GM/NIGMS NIH HHS/United States R33 CA84699/CA/NCI NIH HHS/United States Research Support, Non-U.S. Gov{\textquoteright}t Research Support, U.S. Gov{\textquoteright}t, P.H.S. England Nucleic acids research Nucleic Acids Res. 2004 Jan 1;32(Database issue):D217-22.}, pages = {D217-22}, abstract = {MODBASE (http://salilab.org/modbase) is a relational database of annotated comparative protein structure models for all available protein sequences matched to at least one known protein structure. The models are calculated by MODPIPE, an automated modeling pipeline that relies on the MODELLER package for fold assignment, sequence-structure alignment, model building and model assessment (http:/salilab.org/modeller). MODBASE uses the MySQL relational database management system for flexible querying and CHIMERA for viewing the sequences and structures (http://www.cgl.ucsf.edu/chimera/). MODBASE is updated regularly to reflect the growth in protein sequence and structure databases, as well as improvements in the software for calculating the models. For ease of access, MODBASE is organized into different data sets. The largest data set contains 1,26,629 models for domains in 659,495 out of 1,182,126 unique protein sequences in the complete Swiss-Prot/TrEMBL database (August 25, 2003); only models based on alignments with significant similarity scores and models assessed to have the correct fold despite insignificant alignments are included. Another model data set supports target selection and structure-based annotation by the New York Structural Genomics Research Consortium; e.g. the 53 new structures produced by the consortium allowed us to characterize structurally 24,113 sequences. MODBASE also contains binding site predictions for small ligands and a set of predicted interactions between pairs of modeled sequences from the same genome. Our other resources associated with MODBASE include a comprehensive database of multiple protein structure alignments (DBALI, http://salilab.org/dbali) as well as web servers for automated comparative modeling with MODPIPE (MODWEB, http://salilab. org/modweb), modeling of loops in protein structures (MODLOOP, http://salilab.org/modloop) and predicting functional consequences of single nucleotide polymorphisms (SNPWEB, http://salilab. org/snpweb).}, keywords = {Amino Acid Sequence Animals Binding Sites *Computational Biology *Databases, Molecular Molecular Sequence Data Polymorphism, Protein Genomics Humans Internet Ligands Models, Single Nucleotide Protein Binding Protein Conformation Proteins/*chemistry/genetics Sequence Alignment Software User-Computer Interface}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=14681398}, author = {Pieper, U. and Eswar, N. and Braberg, H. and Madhusudhan, M. S. and Davis, F. P. and Stuart, A. C. and Mirkovic, N. and Rossi, A. and M. A. Marti-Renom and Fiser, A. and Webb, B. and Greenblatt, D. and Huang, C. C. and Ferrin, T. E. and Sali, A.} } @article {15215434, title = {New challenges in gene expression data analysis and the extended GEPAS}, journal = {Nucleic Acids Res}, volume = {32}, year = {2004}, note = {

Herrero, Javier Vaquerizas, Juan M Al-Shahrour, Fatima Conde, Lucia Mateos, Alvaro Diaz-Uriarte, Javier Santoyo Ramon Dopazo, Joaquin England Nucleic acids research Nucleic Acids Res. 2004 Jul 1;32(Web Server issue):W485-91.

}, pages = {W485-91}, abstract = {

Since the first papers published in the late nineties, including, for the first time, a comprehensive analysis of microarray data, the number of questions that have been addressed through this technique have both increased and diversified. Initially, interest focussed on genes coexpressing across sets of experimental conditions, implying, essentially, the use of clustering techniques. Recently, however, interest has focussed more on finding genes differentially expressed among distinct classes of experiments, or correlated to diverse clinical outcomes, as well as in building predictors. In addition to this, the availability of accurate genomic data and the recent implementation of CGH arrays has made mapping expression and genomic data on the chromosomes possible. There is also a clear demand for methods that allow the automatic transfer of biological information to the results of microarray experiments. Different initiatives, such as the Gene Ontology (GO) consortium, pathways databases, protein functional motifs, etc., provide curated annotations for genes. Whereas many resources on the web focus mainly on clustering methods, GEPAS has evolved to cope with the aforementioned new challenges that have recently arisen in the field of microarray data analysis. The web-based pipeline for microarray gene expression data, GEPAS, is available at http://gepas.bioinfo.cnio.es.

}, keywords = {gepas, microarray data analysis}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=15215434}, author = {Herrero, J. and Vaquerizas, J. M. and Fatima Al-Shahrour and L. Conde and A. Mateos and Diaz-Uriarte, J. S. and Dopazo, J.} } @article {15095013, title = {Prediction of protein function and pathways in the genome era}, journal = {Cell Mol Life Sci}, volume = {61}, number = {7-8}, year = {2004}, note = {Gabaldon, T Huynen, M A Review Switzerland Cellular and molecular life sciences : CMLS Cell Mol Life Sci. 2004 Apr;61(7-8):930-44.}, pages = {930-44}, abstract = {The growing number of completely sequenced genomes adds new dimensions to the use of sequence analysis to predict protein function. Compared with the classical knowledge transfer from one protein to a similar sequence (homology-based function prediction), knowledge about the corresponding genes in other genomes (orthology-based function prediction) provides more specific information about the protein{\textquoteright}s function, while the analysis of the sequence in its genomic context (context-based function prediction) provides information about its functional context. Whereas homology-based methods predict the molecular function of a protein, genomic context methods predict the biological process in which it plays a role. These complementary approaches can be combined to elucidate complete functional networks and biochemical pathways from the genome sequence of an organism. Here we review recent advances in the field of genomic-context based methods of protein function prediction. Techniques are highlighted with examples, including an analysis that combines information from genomic-context with homology to predict a role of the RNase L inhibitor in the maturation of ribosomal RNA.}, keywords = {ATP-Binding Cassette Transporters/genetics/metabolism Amino Acid Sequence Animals Artificial Gene Fusion Base Sequence Chaperonins/genetics/metabolism Chromosomes/genetics/metabolism Evolution, Molecular *Genome Genomics Humans Molecular Sequence Data Phylogeny *Proteins/classification/genetics/metabolism RNA, Ribosomal/metabolism Sequence Alignment}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=15095013}, author = {Gabald{\'o}n, T. and M. A. Huynen} } @article {15576054, title = {Shaping the mitochondrial proteome}, journal = {Biochim Biophys Acta}, volume = {1659}, number = {2-3}, year = {2004}, note = {Gabaldon, Toni Huynen, Martijn A Research Support, Non-U.S. Gov{\textquoteright}t Review Netherlands Biochimica et biophysica acta Biochim Biophys Acta. 2004 Dec 6;1659(2-3):212-20.}, pages = {212-20}, abstract = {Mitochondria are eukaryotic organelles that originated from a single bacterial endosymbiosis some 2 billion years ago. The transition from the ancestral endosymbiont to the modern mitochondrion has been accompanied by major changes in its protein content, the so-called proteome. These changes included complete loss of some bacterial pathways, amelioration of others and gain of completely new complexes of eukaryotic origin such as the ATP/ADP translocase and most of the mitochondrial protein import machinery. This renewal of proteins has been so extensive that only 14-16\% of modern mitochondrial proteome has an origin that can be traced back to the bacterial endosymbiont. The rest consists of proteins of diverse origin that were eventually recruited to function in the organelle. This shaping of the proteome content reflects the transformation of mitochondria into a highly specialized organelle that, besides ATP production, comprises a variety of functions within the eukaryotic metabolism. Here we review recent advances in the fields of comparative genomics and proteomics that are throwing light on the origin and evolution of the mitochondrial proteome.}, keywords = {Animals Biological Transport Energy Metabolism Eukaryotic Cells/physiology *Evolution Humans Mitochondria/*physiology Phylogeny Proteome/*physiology}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=15576054}, author = {Gabald{\'o}n, T. and M. A. Huynen} } @article {18629097, title = {An approach to inferring transcriptional regulation among genes from large-scale expression data}, journal = {Comp Funct Genomics}, volume = {4}, number = {1}, year = {2003}, note = {Herrero, Javier Diaz-Uriarte, Ramon Dopazo, Joaquin Egypt Comparative and functional genomics Comp Funct Genomics. 2003;4(1):148-54.}, pages = {148-54}, abstract = {The use of DNA microarrays opens up the possibility of measuring the expression levels of thousands of genes simultaneously under different conditions. Time-course experiments allow researchers to study the dynamics of gene interactions. The inference of genetic networks from such measures can give important insights for the understanding of a variety of biological problems. Most of the existing methods for genetic network reconstruction require many experimental data points, or can only be applied to the reconstruction of small subnetworks. Here we present a method that reduces the dimensionality of the dataset and then extracts the significant dynamic correlations among genes. The method requires a number of points achievable in common time-course experiments.}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=18629097}, author = {Herrero, J. and Diaz-Uriarte, R. and Dopazo, J.} } @article {12695324, title = {Comparing bacterial genomes through conservation profiles}, journal = {Genome Res}, volume = {13}, number = {5}, year = {2003}, note = {Martin, Maria J Herrero, Javier Mateos, Alvaro Dopazo, Joaquin Comparative Study United States Genome research Genome Res. 2003 May;13(5):991-8. Epub 2003 Apr 14.}, pages = {991-8}, abstract = {We constructed two-dimensional representations of profiles of gene conservation across different genomes using the genome of Escherichia coli as a model. These profiles permit both the visualization at the genome level of different traits in the organism studied and, at the same time, reveal features related to the genomes analyzed (such as defective genomes or genomes that lack a particular system). Conserved genes are not uniformly distributed along the E. coli genome but tend to cluster together. The study of gene distribution patterns across genomes is important for the understanding of how sets of genes seem to be dependent on each other, probably having some functional link. This provides additional evidence that can be used for the elucidation of the function of unannotated genes. Clustering these patterns produces families of genes which can be arranged in a hierarchy of closeness. In this way, functions can be defined at different levels of generality depending on the level of the hierarchy that is studied. The combined study of conservation and phenotypic traits opens up the possibility of defining phenotype/genotype associations, and ultimately inferring the gene or genes responsible for a particular trait.}, keywords = {Bacterial Genotype Models, Bacterial/genetics Cluster Analysis Conserved Sequence/*genetics DNA, Bacterial/genetics Escherichia coli/classification/*genetics Evolution, Bacterial/genetics Gene Order/genetics Genes, Bacterial/genetics/physiology *Genome, Chromosome Mapping/methods Chromosomes, Genetic Phenotype Phylogeny Sequence Homology, Molecular Gene Expression Profiling/methods Gene Expression Regulation, Nucleic Acid Species Specificity Terminology as Topic}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=12695324}, author = {Martin, M. J. and Herrero, J. and A. Mateos and Dopazo, J.} } @article {12576477, title = {Examining the role of glutamic acid 183 in chloroperoxidase catalysis}, journal = {J Biol Chem}, volume = {278}, number = {16}, year = {2003}, note = {Yi, Xianwen Conesa, Ana Punt, Peter J Hager, Lowell P GM 07768/GM/NIGMS NIH HHS/United States Research Support, U.S. Gov{\textquoteright}t, P.H.S. United States The Journal of biological chemistry J Biol Chem. 2003 Apr 18;278(16):13855-9. Epub 2003 Feb 7.}, pages = {13855-9}, abstract = {Site-directed mutagenesis has been used to investigate the role of glutamic acid 183 in chloroperoxidase catalysis. Based on the x-ray crystallographic structure of chloroperoxidase, Glu-183 is postulated to function on distal side of the heme prosthetic group as an acid-base catalyst in facilitating the reaction between the peroxidase and hydrogen peroxide with the formation of Compound I. In contrast, the other members of the heme peroxidase family use a histidine residue in this role. Plasmids have now been constructed in which the codon for Glu-183 is replaced with a histidine codon. The mutant recombinant gene has been expressed in Aspergillus niger. An analysis of the produced mutant gene shows that the substitution of Glu-183 with a His residue is detrimental to the chlorination and dismutation activity of chloroperoxidase. The activity is reduced by 85 and 50\% of wild type activity, respectively. However, quite unexpectedly, the epoxidation activity of the mutant enzyme is significantly enhanced approximately 2.5-fold. These results show that Glu-183 is important but not essential for the chlorination activity of chloroperoxidase. It is possible that the increased epoxidation of the mutant enzyme is based on an increase in the hydrophobicity of the active site.}, keywords = {Aspergillus niger/metabolism Catalase/metabolism Catalysis Chloride Peroxidase/*chemistry/*metabolism Chlorine/metabolism Chromatography, Ion Exchange Circular Dichroism Crystallography, Polyacrylamide Gel Fungi/enzymology Glutamic Acid/*chemistry Histidine/chemistry/metabolism Hydrogen-Ion Concentration Immunoblotting Isoelectric Focusing Mutation Oxidoreductases/metabolism Plasmids/metabolism, X-Ray Electrophoresis}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=12576477}, author = {Yi, X. and A. Conesa and Punt, P. J. and Hager, L. P.} } @article {12651726, title = {Gene expression data preprocessing}, journal = {Bioinformatics}, volume = {19}, number = {5}, year = {2003}, note = {Herrero, J Diaz-Uriarte, R Dopazo, J England Bioinformatics (Oxford, England) Bioinformatics. 2003 Mar 22;19(5):655-6.}, pages = {655-6}, abstract = {We present an interactive web tool for preprocessing microarray gene expression data. It analyses the data, suggests the most appropriate transformations and proceeds with them after user agreement. The normal preprocessing steps include scale transformations, management of missing values, replicate handling, flat pattern filtering and pattern standardization and they are required before performing any pattern analysis. The processed data set can be sent to other pattern analysis tools.}, keywords = {*Database Management Systems Gene Expression Profiling/*methods Information Storage and Retrieval/methods Internet Oligonucleotide Array Sequence Analysis/*methods Sequence Alignment/*methods Sequence Analysis, DNA/*methods *Software *User-Computer Interface}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=12651726}, author = {Herrero, J. and Diaz-Uriarte, R. and Dopazo, J.} } @article {12824345, title = {GEPAS: A web-based resource for microarray gene expression data analysis}, journal = {Nucleic Acids Res}, volume = {31}, number = {13}, year = {2003}, note = {

Herrero, Javier Al-Shahrour, Fatima Diaz-Uriarte, Ramon Mateos, Alvaro Vaquerizas, Juan M Santoyo, Javier Dopazo, Joaquin Research Support, Non-U.S. Gov{\textquoteright}t England Nucleic acids research Nucleic Acids Res. 2003 Jul 1;31(13):3461-7.

}, pages = {3461-7}, abstract = {

We present a web-based pipeline for microarray gene expression profile analysis, GEPAS, which stands for Gene Expression Profile Analysis Suite (http://gepas.bioinfo.cnio.es). GEPAS is composed of different interconnected modules which include tools for data pre-processing, two-conditions comparison, unsupervised and supervised clustering (which include some of the most popular methods as well as home made algorithms) and several tests for differential gene expression among different classes, continuous variables or survival analysis. A multiple purpose tool for data mining, based on Gene Ontology, is also linked to the tools, which constitutes a very convenient way of analysing clustering results. On-line tutorials are available from our main web server (http://bioinfo.cnio.es).

}, keywords = {gepas, microarray data analysis}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=12824345}, author = {Herrero, J. and Fatima Al-Shahrour and Diaz-Uriarte, R. and A. Mateos and Vaquerizas, J. M. and J. Santoyo and Dopazo, J.} } @article {12893934, title = {Reconstruction of the proto-mitochondrial metabolism}, journal = {Science}, volume = {301}, number = {5633}, year = {2003}, note = {Gabaldon, Toni Huynen, Martijn A Comparative Study Research Support, Non-U.S. Gov{\textquoteright}t United States Science (New York, N.Y.) Science. 2003 Aug 1;301(5633):609.}, pages = {609}, keywords = {Aerobiosis Algorithms Alphaproteobacteria/chemistry/genetics/*metabolism Amino Acids/metabolism Animals Bacterial Proteins/chemistry/*metabolism Genome Genome, Bacterial Glycerol/metabolism Humans Lipid Metabolism Mitochondria/chemistry/genetics/*metabolism Phylogeny *Proteome Symbiosis Yeasts/metabolism}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=12893934}, author = {Gabald{\'o}n, T. and M. A. Huynen} } @inbook {492, title = {Using Gene Ontology on genome-scale studies to find significant associations of biologically relevant terms to group of genes}, booktitle = {Neural Networks for Signal Processing XIII}, year = {2003}, pages = {43-52}, publisher = {IEEE Press}, organization = {IEEE Press}, address = {New York, USA}, keywords = {babelomics}, author = {Fatima Al-Shahrour and Herrero, J. and A. Mateos and J. Santoyo and D{\'\i}az-Uriarte, R and Dopazo, J.} } @article {12141992, title = {Bioinformatics methods for the analysis of expression arrays: data clustering and information extraction}, journal = {J Biotechnol}, volume = {98}, number = {2-3}, year = {2002}, note = {Tamames, Javier Clark, Dominic Herrero, Javier Dopazo, Joaquin Blaschke, Christian Fernandez, Jose M Oliveros, Juan C Valencia, Alfonso Review Netherlands Journal of biotechnology J Biotechnol. 2002 Sep 25;98(2-3):269-83.}, pages = {269-83}, abstract = {Expression arrays facilitate the monitoring of changes in the expression patterns of large collections of genes. The analysis of expression array data has become a computationally-intensive task that requires the development of bioinformatics technology for a number of key stages in the process, such as image analysis, database storage, gene clustering and information extraction. Here, we review the current trends in each of these areas, with particular emphasis on the development of the related technology being carried out within our groups.}, keywords = {Abstracting and Indexing as Topic/methods *Cluster Analysis *Database Management Systems Databases, Computer-Assisted/methods Information Storage and Retrieval/*methods Internet Medline National Library of Medicine (U.S.) Oligonucleotide Array Sequence Analysis/*methods United States, Genetic Gene Expression Gene Expression Profiling/*methods Image Processing}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=12141992}, author = {J. Tamames and Clark, D. and Herrero, J. and Dopazo, J. and Blaschke, C. and Fernandez, J. M. and Oliveros, J. C. and Valencia, A.} } @article {12645919, title = {Combining hierarchical clustering and self-organizing maps for exploratory analysis of gene expression patterns}, journal = {J Proteome Res}, volume = {1}, number = {5}, year = {2002}, note = {Herrero, Javier Dopazo, Joaquin Research Support, Non-U.S. Gov{\textquoteright}t United States Journal of proteome research J Proteome Res. 2002 Sep-Oct;1(5):467-70.}, pages = {467-70}, abstract = {Self-organizing maps (SOM) constitute an alternative to classical clustering methods because of its linear run times and superior performance to deal with noisy data. Nevertheless, the clustering obtained with SOM is dependent on the relative sizes of the clusters. Here, we show how the combination of SOM with hierarchical clustering methods constitutes an excellent tool for exploratory analysis of massive data like DNA microarray expression patterns.}, keywords = {Cluster Analysis Computational Biology/methods *Gene Expression Genes, Fungal/genetics *Genome Oligonucleotide Array Sequence Analysis/*methods Statistics as Topic/*methods Time Factors}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=12645919}, author = {Herrero, J. and Dopazo, J.} } @article {12414529, title = {Identification of genes involved in resistance to interferon-alpha in cutaneous T-cell lymphoma}, journal = {Am J Pathol}, volume = {161}, number = {5}, year = {2002}, note = {Tracey, Lorraine Villuendas, Raquel Ortiz, Pablo Dopazo, Ana Spiteri, Inmaculada Lombardia, Luis Rodriguez-Peralto, Jose L Fernandez-Herrera, Jesus Hernandez, Almudena Fraga, Javier Dominguez, Orlando Herrero, Javier Alonso, Miguel A Dopazo, Joaquin Piris, Miguel A Research Support, Non-U.S. Gov{\textquoteright}t United States The American journal of pathology Am J Pathol. 2002 Nov;161(5):1825-37.}, pages = {1825-37}, abstract = {Interferon-alpha therapy has been shown to be active in the treatment of mycosis fungoides although the individual response to this therapy is unpredictable and dependent on essentially unknown factors. In an effort to better understand the molecular mechanisms of interferon-alpha resistance we have developed an interferon-alpha resistant variant from a sensitive cutaneous T-cell lymphoma cell line. We have performed expression analysis to detect genes differentially expressed between both variants using a cDNA microarray including 6386 cancer-implicated genes. The experiments showed that resistance to interferon-alpha is consistently associated with changes in the expression of a set of 39 genes, involved in signal transduction, apoptosis, transcription regulation, and cell growth. Additional studies performed confirm that STAT1 and STAT3 expression and interferon-alpha induction and activation are not altered between both variants. The gene MAL, highly overexpressed by resistant cells, was also found to be expressed by tumoral cells in a series of cutaneous T-cell lymphoma patients treated with interferon-alpha and/or photochemotherapy. MAL expression was associated with longer time to complete remission. Time-course experiments of the sensitive and resistant cells showed a differential expression of a subset of genes involved in interferon-response (1 to 4 hours), cell growth and apoptosis (24 to 48 hours.), and signal transduction.}, keywords = {Antineoplastic Agents/*pharmacology/therapeutic use Carrier Proteins/biosynthesis/genetics DNA-Binding Proteins/biosynthesis/genetics Drug Resistance, Biological Oligonucleotide Array Sequence Analysis RNA, Cultured, Cutaneous/diagnosis/drug therapy/*genetics/metabolism *Membrane Glycoproteins Models, Interleukin-1 Reproducibility of Results STAT1 Transcription Factor STAT3 Transcription Factor Trans-Activators/biosynthesis/genetics Tumor Cells, Neoplasm Gene Expression Profiling *Gene Expression Regulation, Neoplasm/biosynthesis *Receptors, Neoplastic Humans Interferon-alpha/*pharmacology/therapeutic use Kinetics Lymphoma, T-Cell}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=12414529}, author = {Tracey, L. and Villuendas, R. and Ortiz, P. and Dopazo, A. and Spiteri, I. and Lombardia, L. and Rodriguez-Peralto, J. L. and Fernandez-Herrera, J. and Hernandez, A. and Fraga, J. and Dominguez, O. and Herrero, J. and Alonso, M. A. and Dopazo, J. and Piris, M. A.} } @book {557, title = {Methods of Microarray Data Analysis IISupervised Neural Networks for Clustering Conditions in DNA Array Data After Reducing Noise by Clustering Gene Expression Profiles}, year = {2002}, pages = {91 - 103}, publisher = {Kluwer Academic Publishers}, organization = {Kluwer Academic Publishers}, address = {Boston}, doi = {10.1007/b11298210.1007/0-306-47598-7_7}, url = {http://www.springerlink.com/index/10.1007/b112982http://link.springer.com/10.1007/0-306-47598-7_7http://www.springerlink.com/index/pdf/10.1007/0-306-47598-7_7}, author = {Mateos, Alvaro and Herrero, Javier and Tamames, Javier and Dopazo, Joaquin}, editor = {Lin, Simon M. and Johnson, Kimberly F.} } @inbook {495, title = {Supervised Neural Networks For Clustering Conditions In DNA Array Data After Reducing Noise By Clustering Gene Expression Profiles}, booktitle = {Microarray data analysis II}, year = {2002}, pages = {91-103}, publisher = {Kluwer Academic}, organization = {Kluwer Academic}, author = {A. Mateos and Herrero, J. and J. Tamames and Dopazo, J.} } @conference {556, title = {Unsupervised reduction of the dimensionality followed by supervised learning with a perceptron improves the classification of conditions in DNA microarray gene expression data}, booktitle = {Neural Networks for Signal Processing XII. 2002 IEEE Signal Processing Society WorkshopProceedings of the 12th IEEE Workshop on Neural Networks for Signal Processing}, year = {2002}, publisher = {IEEE}, organization = {IEEE}, address = {Martigny, Switzerland}, doi = {10.1109/NNSP.2002.1030019}, url = {http://ieeexplore.ieee.org/document/1030019/http://xplorestaging.ieee.org/ielx5/8007/22134/01030019.pdf?arnumber=1030019}, author = {Conde, L. and Mateos, A. and Herrero, J. and Dopazo, J.} } @inbook {496, title = {Using perceptrons for supervised classification of DNA microarray samples: obtaining the optimal level of information and finding differentially expressed genes}, booktitle = {ICANN 2002, LNCS 2415}, year = {2002}, pages = {577-582}, publisher = {J.R. Dorronsoro}, organization = {J.R. Dorronsoro}, author = {A. Mateos and Herrero, J. and Dopazo, J.} } @article {11442348, title = {Annotated draft genomic sequence from a Streptococcus pneumoniae type 19F clinical isolate}, journal = {Microb Drug Resist}, volume = {7}, number = {2}, year = {2001}, note = {Dopazo, J Mendoza, A Herrero, J Caldara, F Humbert, Y Friedli, L Guerrier, M Grand-Schenk, E Gandin, C de Francesco, M Polissi, A Buell, G Feger, G Garcia, E Peitsch, M Garcia-Bustos, J F United States Microbial drug resistance (Larchmont, N.Y.) Microb Drug Resist. 2001 Summer;7(2):99-125.}, pages = {99-125}, abstract = {The public availability of numerous microbial genomes is enabling the analysis of bacterial biology in great detail and with an unprecedented, organism-wide and taxon-wide, broad scope. Streptococcus pneumoniae is one of the most important bacterial pathogens throughout the world. We present here sequences and functional annotations for 2.1-Mbp of pneumococcal DNA, covering more than 90\% of the total estimated size of the genome. The sequenced strain is a clinical isolate resistant to macrolides and tetracycline. It carries a type 19F capsular locus, but multilocus sequence typing for several conserved genetic loci suggests that the strain sequenced belongs to a pneumococcal lineage that most often expresses a serotype 15 capsular polysaccharide. A total of 2,046 putative open reading frames (ORFs) longer than 100 amino acids were identified (average of 1,009 bp per ORF), including all described two-component systems and aminoacyl tRNA synthetases. Comparisons to other complete, or nearly complete, bacterial genomes were made and are presented in a graphical form for all the predicted proteins.}, keywords = {Bacterial Molecular Sequence Data Pneumococcal Infections/*microbiology Prokaryotic Cells RNA, Bacterial/chemistry/genetics Genes, Bacterial/genetics *Genome, DNA, Transfer/metabolism Streptococcus pneumoniae/*genetics}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=11442348}, author = {Dopazo, J. and Mendoza, A. and Herrero, J. and Caldara, F. and Humbert, Y. and Friedli, L. and Guerrier, M. and Grand-Schenk, E. and Gandin, C. and de Francesco, M. and Polissi, A. and Buell, G. and Feger, G. and Garcia, E. and Peitsch, M. and Garcia-Bustos, J. F.} } @article {11238068, title = {A hierarchical unsupervised growing neural network for clustering gene expression patterns}, journal = {Bioinformatics}, volume = {17}, number = {2}, year = {2001}, note = {Herrero, J Valencia, A Dopazo, J Research Support, Non-U.S. Gov{\textquoteright}t England Bioinformatics (Oxford, England) Bioinformatics. 2001 Feb;17(2):126-36.}, pages = {126-36}, abstract = {MOTIVATION: We describe a new approach to the analysis of gene expression data coming from DNA array experiments, using an unsupervised neural network. DNA array technologies allow monitoring thousands of genes rapidly and efficiently. One of the interests of these studies is the search for correlated gene expression patterns, and this is usually achieved by clustering them. The Self-Organising Tree Algorithm, (SOTA) (Dopazo,J. and Carazo,J.M. (1997) J. Mol. Evol., 44, 226-233), is a neural network that grows adopting the topology of a binary tree. The result of the algorithm is a hierarchical cluster obtained with the accuracy and robustness of a neural network. RESULTS: SOTA clustering confers several advantages over classical hierarchical clustering methods. SOTA is a divisive method: the clustering process is performed from top to bottom, i.e. the highest hierarchical levels are resolved before going to the details of the lowest levels. The growing can be stopped at the desired hierarchical level. Moreover, a criterion to stop the growing of the tree, based on the approximate distribution of probability obtained by randomisation of the original data set, is provided. By means of this criterion, a statistical support for the definition of clusters is proposed. In addition, obtaining average gene expression patterns is a built-in feature of the algorithm. Different neurons defining the different hierarchical levels represent the averages of the gene expression patterns contained in the clusters. Since SOTA runtimes are approximately linear with the number of items to be classified, it is especially suitable for dealing with huge amounts of data. The method proposed is very general and applies to any data providing that they can be coded as a series of numbers and that a computable measure of similarity between data items can be used. AVAILABILITY: A server running the program can be found at: http://bioinfo.cnio.es/sotarray.}, keywords = {*Algorithms Automatic Data Processing *Gene Expression Profiling *Neural Networks (Computer) *Oligonucleotide Array Sequence Analysis}, url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve\&db=PubMed\&dopt=Citation\&list_uids=11238068}, author = {Herrero, J. and Valencia, A. and Dopazo, J.} }