2015
Delgado, Maria Soledad; Morán, Federico; Mora, Antonio; Merelo, Juan Julián; Briones, Carlos
A novel representation of genomic sequences for taxonomic clustering and visualization by means of self-organizing maps Journal Article
In: Bioinformatics, vol. 31, no. 5, pp. 736-744, 2015, ISSN: 1367-4803.
Abstract | Links | BibTeX | Tags: Clustering, Distance Measures, Growing cell structures, Self-organizing map
@article{10.1093/bioinformatics/btu708,
title = {A novel representation of genomic sequences for taxonomic clustering and visualization by means of self-organizing maps},
author = {Maria Soledad Delgado and Federico Morán and Antonio Mora and Juan Julián Merelo and Carlos Briones},
url = {https://doi.org/10.1093/bioinformatics/btu708},
doi = {10.1093/bioinformatics/btu708},
issn = {1367-4803},
year = {2015},
date = {2015-03-01},
urldate = {2015-03-01},
journal = {Bioinformatics},
volume = {31},
number = {5},
pages = {736-744},
abstract = {Motivation: Self-organizing maps (SOMs) are readily available bioinformatics methods for clustering and visualizing high-dimensional data, provided that such biological information is previously transformed to fixed-size, metric-based vectors. To increase the usefulness of SOM-based approaches for the analysis of genomic sequence data, novel representation methods are required that automatically and bijectively transform aligned nucleotide sequences into numeric vectors, dealing with both nucleotide ambiguity and gaps derived from sequence alignment.Results: Six different codification variants based on Euclidean space, just like SOM processing, have been tested using two SOM models: the classical Kohonen’s SOM and growing cell structures. They have been applied to two different sets of sequences: 32 sequences of small sub-unit ribosomal RNA from organisms belonging to the three domains of life, and 44 sequences of the reverse transcriptase region of the pol gene of human immunodeficiency virus type 1 belonging to different groups and sub-types. Our results show that the most important factor affecting the accuracy of sequence clustering is the assignment of an extra weight to the presence of alignment-derived gaps. Although each of the codification variants shows a different level of taxonomic consistency, the results are in agreement with sequence-based phylogenetic reconstructions and anticipate a broad applicability of this codification method.Contact:sole@eui.upm.esSupplementary information:Supplementary Data are available at Bioinformatics online.},
keywords = {Clustering, Distance Measures, Growing cell structures, Self-organizing map},
pubstate = {published},
tppubtype = {article}
}
2013
Bello-Orgaz, Gema; Camacho, David
Comparative study of text clustering techniques in virtual worlds Conference
Proceedings of the 3rd International Conference on Web Intelligence, Mining and Semantics, WIMS '13 ACM Press, Madrid, Spain, 2013, ISBN: 978-1-4503-1850-1/13/06.
Links | BibTeX | Tags: Behavioral Patterns, Clustering, Distance Measures, Mahout Library, Text Clustering, Virtual World
@conference{Bello-Orgaz:2013:CST:2479787.2479818,
title = {Comparative study of text clustering techniques in virtual worlds},
author = {Gema Bello-Orgaz and David Camacho},
url = {http://dx.doi.org/10.1145/2479787.2479818},
isbn = {978-1-4503-1850-1/13/06},
year = {2013},
date = {2013-06-12},
urldate = {2013-06-12},
booktitle = {Proceedings of the 3rd International Conference on Web Intelligence, Mining and Semantics},
pages = {8},
publisher = {ACM Press},
address = {Madrid, Spain},
series = {WIMS '13},
keywords = {Behavioral Patterns, Clustering, Distance Measures, Mahout Library, Text Clustering, Virtual World},
pubstate = {published},
tppubtype = {conference}
}