@article{M588F7D25, title = "A Node2Vec-Based Gene Expression Image Representation Method for Effectively Predicting Cancer Prognosis", journal = "KIPS Transactions on Software and Data Engineering", year = "2019", issn = "2287-5905", doi = "10.3745/KTSDE.2019.8.10.397", author = "Jonghwan Choi/Sanghyun Park", keywords = "Bioinformatics, Gene Expression, Node2Vec, Cancer Prognostic Prediction, Personalized Medicine", abstract = "Accurately predicting cancer prognosis to provide appropriate treatment strategies for patients is one of the critical challenges in bioinformatics. Many researches have suggested machine learning models to predict patients’ outcomes based on their gene expression data. Gene expression data is high-dimensional numerical data containing about 17,000 genes, so traditional researches used feature selection or dimensionality reduction approaches to elevate the performance of prognostic prediction models. These approaches, however, have an issue of making it difficult for the predictive models to grasp any biological interaction between the selected genes because feature selection and model training stages are performed independently. In this paper, we propose a novel two-dimensional image formatting approach for gene expression data to achieve feature selection and prognostic prediction effectively. Node2Vec is exploited to integrate biological interaction network and gene expression data and a convolutional neural network learns the integrated two-dimensional gene expression image data and predicts cancer prognosis. We evaluated our proposed model through double cross-validation and confirmed superior prognostic prediction accuracy to traditional machine learning models based on raw gene expression data. As our proposed approach is able to improve prediction models without loss of information caused by feature selection steps, we expect this will contribute to development of personalized medicine." }