<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.0 20120330//EN" "JATS-journalpublishing1.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">JDS</journal-id>
<journal-title-group><journal-title>Journal of Data Science</journal-title></journal-title-group>
<issn pub-type="epub">1683-8602</issn><issn pub-type="ppub">1680-743X</issn><issn-l>1680-743X</issn-l>
<publisher>
<publisher-name>School of Statistics, Renmin University of China</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">JDS1111</article-id>
<article-id pub-id-type="doi">10.6339/23-JDS1111</article-id>
<article-categories><subj-group subj-group-type="heading">
<subject>Data Science in Action</subject></subj-group></article-categories>
<title-group>
<article-title>The Effects of County-Level Socioeconomic and Healthcare Factors on Controlling COVID-19 in the Southern and Southeastern United States</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Barth</surname><given-names>Jackson</given-names></name><xref ref-type="aff" rid="j_jds1111_aff_001">1</xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Cheng</surname><given-names>Guanqing</given-names></name><xref ref-type="aff" rid="j_jds1111_aff_001">1</xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Williams</surname><given-names>Webb</given-names></name><xref ref-type="aff" rid="j_jds1111_aff_001">1</xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhang</surname><given-names>Ming</given-names></name><xref ref-type="aff" rid="j_jds1111_aff_001">1</xref>
</contrib>
<contrib contrib-type="author">
<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-4685-2199</contrib-id>
<name><surname>Ng</surname><given-names>Hon Keung Tony</given-names></name><email xlink:href="mailto:tng@bentley.edu">tng@bentley.edu</email><xref ref-type="aff" rid="j_jds1111_aff_002">2</xref><xref ref-type="corresp" rid="cor1">∗</xref>
</contrib>
<aff id="j_jds1111_aff_001"><label>1</label>Department of Statistical Science, <institution>Southern Methodist University</institution>, Dallas, Texas 75275, <country>USA</country></aff>
<aff id="j_jds1111_aff_002"><label>2</label>Department of Mathematical Sciences, <institution>Bentley University</institution>, Waltham, Massachusetts 02452, <country>USA</country></aff>
</contrib-group>
<author-notes>
<corresp id="cor1"><label>∗</label>Corresponding author. Email: <ext-link ext-link-type="uri" xlink:href="mailto:tng@bentley.edu">tng@bentley.edu</ext-link>.</corresp>
</author-notes>
<pub-date pub-type="ppub"><year>2024</year></pub-date><pub-date pub-type="epub"><day>5</day><month>9</month><year>2023</year></pub-date><volume>22</volume><issue>4</issue><fpage>631</fpage><lpage>646</lpage><supplementary-material id="S1" content-type="archive" xlink:href="jds1111_s001.zip" mimetype="application" mime-subtype="x-zip-compressed">
<caption>
<title>Supplementary Material</title>
<p><bold>S1. Code</bold></p>
<p>To ensure the reproducibility of the results presented in this manuscript, the following supplementary materials are provided at the GitHub archive <uri>https://github.com/chriszhangm/ASA-Data-Expo-2021</uri>: 
<list>
<list-item id="j_jds1111_li_001">
<label>•</label>
<p><monospace>Data_clean.R</monospace>: The R code for data cleaning;</p>
</list-item>
<list-item id="j_jds1111_li_002">
<label>•</label>
<p><monospace>modeling.R</monospace>: The R functions to show results in our paper and R shiny website.</p>
</list-item>
<list-item id="j_jds1111_li_003">
<label>•</label>
<p><monospace>app.R</monospace>: The R code to run the R shiny website;</p>
</list-item>
<list-item id="j_jds1111_li_004">
<label>•</label>
<p><monospace>full_data.csv</monospace>: full data set includes two response variables (score_infection, score_death) and socioeconomic and healthcare factors.</p>
</list-item>
<list-item id="j_jds1111_li_005">
<label>•</label>
<p><monospace>counties_prj.csv &amp; states_SE.csv</monospace>: Two datasets for producing geographic graphs in the R shiny website.</p>
</list-item>
</list>
</p>
</caption>
</supplementary-material><history><date date-type="received"><day>30</day><month>11</month><year>2022</year></date><date date-type="accepted"><day>10</day><month>7</month><year>2023</year></date></history>
<permissions><copyright-statement>2024 The Author(s). Published by the School of Statistics and the Center for Applied Statistics, Renmin University of China.</copyright-statement><copyright-year>2024</copyright-year>
<license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
<license-p>Open access article under the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">CC BY</ext-link> license.</license-p></license></permissions>
<abstract>
<p>This paper aims to determine the effects of socioeconomic and healthcare factors on the performance of controlling COVID-19 in both the Southern and Southeastern United States. This analysis will provide government agencies with information to determine what communities need additional COVID-19 assistance, to identify counties that effectively control COVID-19, and to apply effective strategies on a broader scale. The statistical analysis uses data from 328 counties with a population of more than 65,000 from 13 states. We define a new response variable by considering infection and mortality rates to capture how well each county controls COVID-19. We collect 14 factors from the 2019 American Community Survey Single-Year Estimates and obtain county-level infection and mortality rates from <ext-link ext-link-type="uri" xlink:href="http://USAfacts.org">USAfacts.org</ext-link>. We use the least absolute shrinkage and selection operator (LASSO) regression to fit a multiple linear regression model and develop an interactive system programmed in R shiny to deliver all results. The interactive system at <uri>https://asa-competition-smu.shinyapps.io/COVID19/</uri> provides many options for users to explore our data, models, and results.</p>
</abstract>
<kwd-group>
<label>Keywords</label>
<kwd>American Community Survey</kwd>
<kwd>interactive system</kwd>
<kwd>LASSO regression</kwd>
<kwd>R shiny</kwd>
</kwd-group>
</article-meta>
</front>
<back>
<ref-list id="j_jds1111_reflist_001">
<title>References</title>
<ref id="j_jds1111_ref_001">
<mixed-citation publication-type="journal"> <string-name><surname>Abedi</surname> <given-names>V</given-names></string-name>, <string-name><surname>Olulana</surname> <given-names>O</given-names></string-name>, <string-name><surname>Avula</surname> <given-names>V</given-names></string-name>, <string-name><surname>Chaudhary</surname> <given-names>D</given-names></string-name>, <string-name><surname>Khan</surname> <given-names>A</given-names></string-name>, <string-name><surname>Shahjouei</surname> <given-names>S</given-names></string-name>, <etal>et al.</etal> (<year>2021</year>). <article-title>Racial, economic, and health inequality and COVID-19 infection in the United States</article-title>. <source><italic>Journal of Racial and Ethnic Health Disparities</italic></source>, <volume>8</volume>: <fpage>732</fpage>–<lpage>742</lpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1007/s40615-020-00833-4" xlink:type="simple">https://doi.org/10.1007/s40615-020-00833-4</ext-link></mixed-citation>
</ref>
<ref id="j_jds1111_ref_002">
<mixed-citation publication-type="journal"> <string-name><surname>Bilinski</surname> <given-names>A</given-names></string-name>, <string-name><surname>Emanuel</surname> <given-names>EJ</given-names></string-name> (<year>2020</year>). <article-title>COVID-19 and excess all-cause mortality in the US and 18 comparison countries</article-title>. <source><italic>JAMA: The Journal of the American Medical Association</italic></source>, <volume>324</volume>(<issue>20</issue>): <fpage>2100</fpage>–<lpage>2102</lpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1001/jama.2020.20717" xlink:type="simple">https://doi.org/10.1001/jama.2020.20717</ext-link></mixed-citation>
</ref>
<ref id="j_jds1111_ref_003">
<mixed-citation publication-type="other"> <string-name><surname>Chang</surname> <given-names>W</given-names></string-name>, <string-name><surname>Cheng</surname> <given-names>J</given-names></string-name>, <string-name><surname>Allaire</surname> <given-names>J</given-names></string-name>, <string-name><surname>Sievert</surname> <given-names>C</given-names></string-name>, <string-name><surname>Schloerke</surname> <given-names>B</given-names></string-name>, <string-name><surname>Xie</surname> <given-names>Y</given-names></string-name>, et al. (2022). <italic>shiny: Web Application Framework for R</italic>. R package version 1.7.3.9001.</mixed-citation>
</ref>
<ref id="j_jds1111_ref_004">
<mixed-citation publication-type="journal"> <string-name><surname>Cheng</surname> <given-names>KJG</given-names></string-name>, <string-name><surname>Sun</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Monnat</surname> <given-names>SM</given-names></string-name> (<year>2020</year>). <article-title>COVID-19 death rates are higher in rural counties with larger shares of blacks and hispanics</article-title>. <source><italic>The Journal of Rural Health</italic></source>, <volume>36</volume>(<issue>4</issue>): <fpage>602</fpage>–<lpage>608</lpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1111/jrh.12511" xlink:type="simple">https://doi.org/10.1111/jrh.12511</ext-link></mixed-citation>
</ref>
<ref id="j_jds1111_ref_005">
<mixed-citation publication-type="journal"> <string-name><surname>Clouston</surname> <given-names>SA</given-names></string-name>, <string-name><surname>Natale</surname> <given-names>G</given-names></string-name>, <string-name><surname>Link</surname> <given-names>BG</given-names></string-name> (<year>2021</year>). <article-title>Socioeconomic inequalities in the spread of coronavirus-19 in the United States: A examination of the emergence of social inequalities</article-title>. <source><italic>Social Science &amp; Medicine</italic></source>, <volume>268</volume>: <fpage>113554</fpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1016/j.socscimed.2020.113554" xlink:type="simple">https://doi.org/10.1016/j.socscimed.2020.113554</ext-link></mixed-citation>
</ref>
<ref id="j_jds1111_ref_006">
<mixed-citation publication-type="journal"> <string-name><surname>Desmet</surname> <given-names>K</given-names></string-name>, <string-name><surname>Wacziarg</surname> <given-names>R</given-names></string-name> (<year>2022</year>). <article-title>JUE insight: Understanding spatial variation in COVID-19 across the United States</article-title>. <source><italic>Journal of Urban Economics</italic></source>, <volume>127</volume>: <fpage>103332</fpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1016/j.jue.2021.103332" xlink:type="simple">https://doi.org/10.1016/j.jue.2021.103332</ext-link></mixed-citation>
</ref>
<ref id="j_jds1111_ref_007">
<mixed-citation publication-type="journal"> <string-name><surname>Doti</surname> <given-names>JL</given-names></string-name> (<year>2021</year>). <article-title>Examining the impact of socioeconomic variables on COVID-19 death rates at the state level</article-title>. <source><italic>Journal of Bioeconomics</italic></source>, <volume>23</volume>(<issue>1</issue>): <fpage>15</fpage>–<lpage>53</lpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1007/s10818-021-09309-9" xlink:type="simple">https://doi.org/10.1007/s10818-021-09309-9</ext-link></mixed-citation>
</ref>
<ref id="j_jds1111_ref_008">
<mixed-citation publication-type="journal"> <string-name><surname>Flanagan</surname> <given-names>BE</given-names></string-name>, <string-name><surname>Gregory</surname> <given-names>EW</given-names></string-name>, <string-name><surname>Hallisey</surname> <given-names>EJ</given-names></string-name>, <string-name><surname>Heitgerd</surname> <given-names>JL</given-names></string-name>, <string-name><surname>Lewis</surname> <given-names>B</given-names></string-name> (<year>2011</year>). <article-title>A social vulnerability index for disaster management</article-title>. <source><italic>Journal of Homeland Security and Emergency Management</italic></source>, <volume>8</volume>(<issue>1</issue>), <fpage>Article 3</fpage>.</mixed-citation>
</ref>
<ref id="j_jds1111_ref_009">
<mixed-citation publication-type="journal"> <string-name><surname>Friedman</surname> <given-names>J</given-names></string-name>, <string-name><surname>Hastie</surname> <given-names>T</given-names></string-name>, <string-name><surname>Tibshirani</surname> <given-names>R</given-names></string-name> (<year>2010</year>). <article-title>Regularization paths for generalized linear models via coordinate descent</article-title>. <source><italic>Journal of Statistical Software</italic></source>, <volume>33</volume>(<issue>1</issue>): <fpage>1</fpage>–<lpage>22</lpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.18637/jss.v033.i01" xlink:type="simple">https://doi.org/10.18637/jss.v033.i01</ext-link></mixed-citation>
</ref>
<ref id="j_jds1111_ref_010">
<mixed-citation publication-type="journal"> <string-name><surname>Haeder</surname> <given-names>SF</given-names></string-name>, <string-name><surname>Gollust</surname> <given-names>SE</given-names></string-name> (<year>2020</year>). <article-title>From poor to worse: Health policy and politics scholars assessment of the U.S. COVID-19 response and its implications</article-title>. <source><italic>World Medical and Health Policy</italic></source>, <volume>12</volume>(<issue>4</issue>): <fpage>454</fpage>–<lpage>481</lpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1002/wmh3.371" xlink:type="simple">https://doi.org/10.1002/wmh3.371</ext-link></mixed-citation>
</ref>
<ref id="j_jds1111_ref_011">
<mixed-citation publication-type="book"> <string-name><surname>Hastie</surname> <given-names>T</given-names></string-name>, <string-name><surname>Tibshirani</surname> <given-names>R</given-names></string-name>, <string-name><surname>Friedman</surname> <given-names>J</given-names></string-name> (<year>2009</year>). <source><italic>The Elements of Statistical Learning</italic></source>, <comment>chapter 7</comment>, <fpage>241</fpage>–<lpage>243</lpage>. <publisher-name>Springer</publisher-name>, <publisher-loc>New York</publisher-loc>, <edition>twelve</edition> edition.</mixed-citation>
</ref>
<ref id="j_jds1111_ref_012">
<mixed-citation publication-type="journal"> <string-name><surname>Karmakar</surname> <given-names>M</given-names></string-name>, <string-name><surname>Lantz</surname> <given-names>PM</given-names></string-name>, <string-name><surname>Tipirneni</surname> <given-names>R</given-names></string-name> (<year>2021</year>). <article-title>Association of social and demographic factors with COVID-19 incidence and death rates in the us</article-title>. <source><italic>JAMA Network Open</italic></source>, <volume>4</volume>(<issue>1</issue>): <fpage>e2036462</fpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1001/jamanetworkopen.2020.36462" xlink:type="simple">https://doi.org/10.1001/jamanetworkopen.2020.36462</ext-link></mixed-citation>
</ref>
<ref id="j_jds1111_ref_013">
<mixed-citation publication-type="journal"> <string-name><surname>McLaren</surname> <given-names>J</given-names></string-name> (<year>2021</year>). <article-title>Racial disparity in COVID-19 deaths: Seeking economic roots with census data</article-title>. <source><italic>The B.E. Journal of Economic Analysis &amp; Policy</italic></source>, <volume>21</volume>(<issue>3</issue>): <fpage>897</fpage>–<lpage>919</lpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1515/bejeap-2020-0371" xlink:type="simple">https://doi.org/10.1515/bejeap-2020-0371</ext-link></mixed-citation>
</ref>
<ref id="j_jds1111_ref_014">
<mixed-citation publication-type="journal"> <string-name><surname>Mollalo</surname> <given-names>A</given-names></string-name>, <string-name><surname>Vahedi</surname> <given-names>B</given-names></string-name>, <string-name><surname>Rivera</surname> <given-names>KM</given-names></string-name> (<year>2020</year>). <article-title>GIS-based spatial modeling of COVID-19 incidence rate in the continental United States</article-title>. <source><italic>Science of the Total Environment</italic></source>, <volume>728</volume>: <fpage>138884</fpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1016/j.scitotenv.2020.138884" xlink:type="simple">https://doi.org/10.1016/j.scitotenv.2020.138884</ext-link></mixed-citation>
</ref>
<ref id="j_jds1111_ref_015">
<mixed-citation publication-type="book"> <collab>R Core Team</collab> (<year>2022</year>). <source><italic>R: A Language and Environment for Statistical Computing</italic></source>. <publisher-name>R Foundation for Statistical Computing</publisher-name>, <publisher-loc>Vienna, Austria</publisher-loc>.</mixed-citation>
</ref>
<ref id="j_jds1111_ref_016">
<mixed-citation publication-type="journal"> <string-name><surname>Tibshirani</surname> <given-names>R</given-names></string-name> (<year>1996</year>). <article-title>Regression shrinkage and selection via the lasso</article-title>. <source><italic>Journal of the Royal Statistical Society: Series B (Methodological)</italic></source>, <volume>58</volume>(<issue>1</issue>): <fpage>267</fpage>–<lpage>288</lpage>.</mixed-citation>
</ref>
<ref id="j_jds1111_ref_017">
<mixed-citation publication-type="book"> <string-name><surname>Ward</surname> <given-names>MD</given-names></string-name>, <string-name><surname>Gleditsch</surname> <given-names>KS</given-names></string-name> (<year>2018</year>). <source><italic>Spatial Regression Models</italic></source>, volume <volume>155</volume>. <publisher-name>Sage Publications</publisher-name>.</mixed-citation>
</ref>
<ref id="j_jds1111_ref_018">
<mixed-citation publication-type="journal"> <string-name><surname>Woolf</surname> <given-names>SH</given-names></string-name>, <string-name><surname>Chapman</surname> <given-names>DA</given-names></string-name>, <string-name><surname>Lee</surname> <given-names>JH</given-names></string-name> (<year>2021</year>). <article-title>COVID-19 as the leading cause of death in the United States</article-title>. <source><italic>JAMA: The Journal of the American Medical Association</italic></source>, <volume>325</volume>(<issue>2</issue>): <fpage>123</fpage>–<lpage>124</lpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1001/jama.2020.24865" xlink:type="simple">https://doi.org/10.1001/jama.2020.24865</ext-link></mixed-citation>
</ref>
</ref-list>
</back>
</article>
