@inproceedings{a82bf8aeb1c5463b8479576a86a55e21,
title = "Acquisition of rule-based knowledge for analyzing DNA-binding sites in proteins",
abstract = "This study aims to analyze DNA-binding proteins via acquisition of interpretable knowledge which can accurately predict binding sites in proteins to understand DNA-protein recognition mechanism. For mining accurate and interpretable knowledge, a large-scale dataset consisting of 982 DNA-binding proteins is constructed. This study investigates a novel feature set consisting of 11 features, including solvent accessibility, secondary structure, charge information near the residue, amino acid group and neighbor property. The derived binding and non-binding rules reveal that besides the well-known solvent accessibility, the electric charge distribution near the residue and the amino acid groups also play important roles in prediction of binding sites. The interpretable and accurate knowledge is helpful for biologist to analyze DNA-binding proteins.",
keywords = "Binding site, Decision tree, Knowledge acquisition, Protein",
author = "Ho, {Shinn Jang} and Chang, {Chia Yun} and Huang, {Liang Tsung} and Hwang, {Shiow Fen} and Shinn-Ying Ho",
year = "2007",
month = jun,
day = "6",
doi = "10.4108/infoscale.2007.972",
language = "English",
series = "ACM International Conference Proceeding Series",
publisher = "Association for Computing Machinery",
booktitle = "Proceedings of the 2nd International Conference on Scalable Information Systems, InfoScale 2007",
note = "null ; Conference date: 06-06-2007 Through 08-06-2007",
}