@article{MED527BF5, title = "Detecting Common Weakness Enumeration(CWE) Based on the Transfer Learning of CodeBERT Model", journal = "KIPS Transactions on Software and Data Engineering", year = "2023", issn = "2287-5905", doi = "https://doi.org/10.3745/KTSDE.2023.12.10.431", author = "Chansol Park/So Young Moon/R. Young Chul Kim", keywords = "Software Engineering, Code Visualization, Code Complexity, Code Weakness, Artificial Intelligence", abstract = "Recently the incorporation of artificial intelligence approaches in the field of software engineering has been one of the big topics. In the world, there are actively studying in two directions: 1) software engineering for artificial intelligence and 2) artificial intelligence for software engineering. We attempt to apply artificial intelligence to software engineering to identify and refactor bad code module areas. To learn the patterns of bad code elements well, we must have many datasets with bad code elements labeled correctly for artificial intelligence in this task. The current problems have insufficient datasets for learning and can not guarantee the accuracy of the datasets that we collected. To solve this problem, when collecting code data, bad code data is collected only for code module areas with high-complexity, not the entire code. We propose a method for exploring common weakness enumeration by learning the collected dataset based on transfer learning of the CodeBERT model. The CodeBERT model learns the corresponding dataset more about common weakness patterns in code. With this approach, we expect to identify common weakness patterns more accurately better than one in traditional software engineering." }