@inbook{8de8a39267774c02a3f4e4b88b239ef5,
title = "GARBAGE IN, GARBAGE OUT: A THEORY-DRIVEN APPROACH TO IMPROVE DATA HANDLING IN SUPERVISED MACHINE LEARNING",
abstract = "Machine learning (ML) has recently gained momentum as a method for measurement in strategy research. Yet, little guidance exists regarding how to appropriately apply the method for this purpose in our discipline. We address this by offering a guide to the application of ML in strategy research, with a particular emphasis on data handling practices that should improve our ability to accurately measure our constructs of interest using ML techniques. We offer a brief overview of ML methodologies that can be used for measurement before describing key challenges that exist when applying those methods for this purpose in strategy research (i.e., sample sizes, data noise, and construct complexity). We then outline a theory-driven approach to help scholars overcome these challenges and improve data handling and the subsequent application of ML techniques in strategy research. We demonstrate the efficacy of our approach by applying it to create a linguistic measure of CEOs{\textquoteright} motivational needs in a sample of S&P 500 firms. We conclude by describing steps scholars can take after creating ML-based measures to continue to improve the application of ML in strategy research.",
keywords = "content analysis, data handling, Machine learning, neural network, random forest, support vector machine",
author = "Hyde, {Steven J.} and Eric Bachura and Harrison, {Joseph S.}",
note = "Publisher Copyright: {\textcopyright} 2023 by Emerald Publishing Limited.",
year = "2023",
month = jan,
day = "18",
doi = "10.1108/S1479-838720220000014006",
language = "English",
series = "Research Methodology in Strategy and Management",
publisher = "Emerald Publishing",
pages = "101--132",
booktitle = "Research Methodology in Strategy and Management",
address = "United Kingdom",
}