@inproceedings{58894e96ed4d4bdd929df275613e245e,
title = "Mining domain knowledge: Using functional dependencies to profile data",
abstract = "Poor data quality is one of the primary issues facing big data projects. Cleaning data and improving quality can be expensive and time-intensive. In data warehouse projects, data cleaning is estimated to account for 30% to 80% of the project's development time and budget. Data quality mining is one method used to identify errors that has become increasingly popular in the past 20 years. Our research-in-progress aims to identify multi-field errors via the mining of functional dependencies. Existing research on data quality mining and functional dependencies has focused on improving algorithms to identify a higher percentage of complex errors. The proposed process strives to introduce an efficient method for expediting error identification and increasing a user's domain knowledge in order to reduce the costs associated with cleaning; the process will also include an assessment of when further cleaning is unlikely to be cost effective.",
keywords = "Data cleaning, Data cleaning process, Data mining, Data quality, Domain knowledge, Error identification, Functional dependency",
author = "Derek Legenzoff and Teagen Nabity",
year = "2016",
language = "English",
series = "2016 International Conference on Information Systems, ICIS 2016",
publisher = "Association for Information Systems",
booktitle = "2016 International Conference on Information Systems, ICIS 2016",
address = "United States",
note = "2016 International Conference on Information Systems, ICIS 2016 ; Conference date: 11-12-2016 Through 14-12-2016",
}