<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.0 20120330//EN" "JATS-journalpublishing1.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">JDS</journal-id>
<journal-title-group><journal-title>Journal of Data Science</journal-title></journal-title-group>
<issn pub-type="epub">1683-8602</issn><issn pub-type="ppub">1680-743X</issn><issn-l>1680-743X</issn-l>
<publisher>
<publisher-name>School of Statistics, Renmin University of China</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">JDS1167</article-id>
<article-id pub-id-type="doi">10.6339/25-JDS1167</article-id>
<article-categories><subj-group subj-group-type="heading">
<subject>Data Science Reviews</subject></subj-group></article-categories>
<title-group>
<article-title>A Statistician’s Selective Review of Neural Network Modeling: Algorithms and Applications</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-3153-2662</contrib-id>
<name><surname>Zhang</surname><given-names>Chunming</given-names></name><email xlink:href="mailto:czhang3@wisc.edu">czhang3@wisc.edu</email><xref ref-type="aff" rid="j_jds1167_aff_001">1</xref><xref ref-type="corresp" rid="cor1">∗</xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhang</surname><given-names>Zhengjun</given-names></name><xref ref-type="aff" rid="j_jds1167_aff_002">2</xref><xref ref-type="aff" rid="j_jds1167_aff_001">1</xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhong</surname><given-names>Xinrui</given-names></name><xref ref-type="aff" rid="j_jds1167_aff_001">1</xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Li</surname><given-names>Jialuo</given-names></name><xref ref-type="aff" rid="j_jds1167_aff_001">1</xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhao</surname><given-names>Zhihao</given-names></name><xref ref-type="aff" rid="j_jds1167_aff_001">1</xref>
</contrib>
<aff id="j_jds1167_aff_001"><label>1</label><institution>University of Wisconsin-Madison</institution>, Department of Statistics, Madison, Wisconsin, <country>U.S.A.</country></aff>
<aff id="j_jds1167_aff_002"><label>2</label>School of Economics and Management, and MOE Social Science Laboratory of Digital Economic Forecasts and Policy Simulation, University of Chinese Academy of Sciences, AMSS Center for Forecasting Sciences, <institution>Chinese Academy of Sciences</institution>, Beijing, <country>China</country></aff>
</contrib-group>
<author-notes>
<corresp id="cor1"><label>∗</label>Corresponding author. Email: <ext-link ext-link-type="uri" xlink:href="mailto:czhang3@wisc.edu">czhang3@wisc.edu</ext-link>.</corresp>
</author-notes>
<pub-date pub-type="ppub"><year>2025</year></pub-date><pub-date pub-type="epub"><day>20</day><month>1</month><year>2025</year></pub-date><volume>23</volume><issue>4</issue><fpage>676</fpage><lpage>694</lpage><supplementary-material id="S1" content-type="document" xlink:href="jds1167_s001.pdf" mimetype="application" mime-subtype="pdf">
<caption>
<title>Supplementary Material</title>
<p>The MATLAB implementation, including a README file, is available at <uri>https://github.com/ChunmingZhangUW/Review-NNM_JDS</uri>. The supplementary file includes Appendix A for the proof of Proposition 1 and Appendix B for numerical illustrations of <inline-formula id="j_jds1167_ineq_001"><alternatives><mml:math>
<mml:mi mathvariant="normal">LSTM</mml:mi></mml:math><tex-math><![CDATA[$\mathrm{LSTM}$]]></tex-math></alternatives></inline-formula> models in Section 5.2.</p>
</caption>
</supplementary-material><history><date date-type="received"><day>25</day><month>10</month><year>2024</year></date><date date-type="accepted"><day>8</day><month>1</month><year>2025</year></date></history>
<permissions><copyright-statement>2025 The Author(s). Published by the School of Statistics and the Center for Applied Statistics, Renmin University of China.</copyright-statement><copyright-year>2025</copyright-year>
<license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
<license-p>Open access article under the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">CC BY</ext-link> license.</license-p></license></permissions>
<abstract>
<p>Deep neural networks have a wide range of applications in data science. This paper reviews neural network modeling algorithms and their applications in both supervised and unsupervised learning. Key examples include: (i) binary classification and (ii) nonparametric regression function estimation, both implemented with feedforward neural networks (<inline-formula id="j_jds1167_ineq_002"><alternatives><mml:math>
<mml:mi mathvariant="normal">FNN</mml:mi></mml:math><tex-math><![CDATA[$\mathrm{FNN}$]]></tex-math></alternatives></inline-formula>); (iii) sequential data prediction using long short-term memory (<inline-formula id="j_jds1167_ineq_003"><alternatives><mml:math>
<mml:mi mathvariant="normal">LSTM</mml:mi></mml:math><tex-math><![CDATA[$\mathrm{LSTM}$]]></tex-math></alternatives></inline-formula>) networks; and (iv) image classification using convolutional neural networks (<inline-formula id="j_jds1167_ineq_004"><alternatives><mml:math>
<mml:mi mathvariant="normal">CNN</mml:mi></mml:math><tex-math><![CDATA[$\mathrm{CNN}$]]></tex-math></alternatives></inline-formula>). All implementations are provided in <inline-formula id="j_jds1167_ineq_005"><alternatives><mml:math>
<mml:mi mathvariant="normal">MATLAB</mml:mi></mml:math><tex-math><![CDATA[$\mathrm{MATLAB}$]]></tex-math></alternatives></inline-formula>, making these methods accessible to statisticians and data scientists to support learning and practical application.</p>
</abstract>
<kwd-group>
<label>Keywords</label>
<kwd>classification</kwd>
<kwd>nonparametric regression</kwd>
<kwd>prediction</kwd>
<kwd>time series</kwd>
</kwd-group>
<funding-group><funding-statement>C. Zhang’s work was partially supported by the U.S. National Science Foundation grants DMS-2013486 and DMS-1712418, as well as funding provided by the University of Wisconsin-Madison Office of the Vice Chancellor for Research and Graduate Education through the Wisconsin Alumni Research Foundation. Z. Zhang’s research was supported by NSFC 72442027.</funding-statement></funding-group>
</article-meta>
</front>
<back>
<ref-list id="j_jds1167_reflist_001">
<title>References</title>
<ref id="j_jds1167_ref_001">
<mixed-citation publication-type="journal"> <string-name><surname>Alzubaidi</surname> <given-names>L</given-names></string-name>, <string-name><surname>Zhang</surname> <given-names>J</given-names></string-name>, <string-name><surname>Humaidi</surname> <given-names>AJ</given-names></string-name>, <etal>et al.</etal> (<year>2021</year>). <article-title>Review of deep learning: Concepts, CNN architectures, challenges, applications, future directions</article-title>. <source><italic>Journal of Big Data</italic></source>, <volume>8</volume>(<issue>1</issue>): <fpage>53</fpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1186/s40537-021-00444-8" xlink:type="simple">https://doi.org/10.1186/s40537-021-00444-8</ext-link></mixed-citation>
</ref>
<ref id="j_jds1167_ref_002">
<mixed-citation publication-type="journal"> <string-name><surname>Coleman</surname> <given-names>TF</given-names></string-name>, <string-name><surname>Li</surname> <given-names>Y</given-names></string-name> (<year>1994</year>). <article-title>On the convergence of reflective Newton methods for large-scale nonlinear minimization subject to bounds</article-title>. <source><italic>Mathematical Programming</italic></source>, <volume>67</volume>(<issue>2</issue>): <fpage>189</fpage>–<lpage>224</lpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1007/BF01582221" xlink:type="simple">https://doi.org/10.1007/BF01582221</ext-link></mixed-citation>
</ref>
<ref id="j_jds1167_ref_003">
<mixed-citation publication-type="book"> <string-name><surname>Fan</surname> <given-names>J</given-names></string-name> (<year>2018</year>). <source><italic>Local Polynomial Modelling and Its Applications</italic></source>, <series>Monographs on Statistics and Applied Probability</series> <volume>66</volume>. <publisher-name>Routledge</publisher-name>.</mixed-citation>
</ref>
<ref id="j_jds1167_ref_004">
<mixed-citation publication-type="journal"> <string-name><surname>Farrell</surname> <given-names>MH</given-names></string-name>, <string-name><surname>Liang</surname> <given-names>T</given-names></string-name>, <string-name><surname>Misra</surname> <given-names>S</given-names></string-name> (<year>2021</year>). <article-title>Deep neural networks for estimation and inference</article-title>. <source><italic>Econometrica</italic></source>, <volume>89</volume>(<issue>1</issue>): <fpage>181</fpage>–<lpage>213</lpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.3982/ECTA16901" xlink:type="simple">https://doi.org/10.3982/ECTA16901</ext-link></mixed-citation>
</ref>
<ref id="j_jds1167_ref_005">
<mixed-citation publication-type="journal"> <string-name><surname>Friedman</surname> <given-names>JH</given-names></string-name> (<year>1991</year>). <article-title>Multivariate adaptive regression splines</article-title>. <source><italic>The Annals of Statistics</italic></source>, <volume>19</volume>(<issue>1</issue>): <fpage>1</fpage>–<lpage>67</lpage>.</mixed-citation>
</ref>
<ref id="j_jds1167_ref_006">
<mixed-citation publication-type="book"> <string-name><surname>Friedman</surname> <given-names>JH</given-names></string-name>, <string-name><surname>Tibshirani</surname> <given-names>R</given-names></string-name>, <string-name><surname>Hastie</surname> <given-names>T</given-names></string-name> (<year>2001</year>). <source><italic>The Elements of Statistical Learning: Data Mining, Inference, and Prediction</italic></source>, <edition>1</edition>st ed. <series>Springer Series in Statistics</series>. <publisher-name>Springer</publisher-name>, <publisher-loc>New York</publisher-loc>.</mixed-citation>
</ref>
<ref id="j_jds1167_ref_007">
<mixed-citation publication-type="book"> <string-name><surname>Goodfellow</surname> <given-names>I</given-names></string-name>, <string-name><surname>Bengio</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Courville</surname> <given-names>A</given-names></string-name> (<year>2016</year>). <source><italic>Deep Learning</italic></source>. <publisher-name>MIT Press</publisher-name>. <uri>http://www.deeplearningbook.org</uri>.</mixed-citation>
</ref>
<ref id="j_jds1167_ref_008">
<mixed-citation publication-type="journal"> <string-name><surname>Higham</surname> <given-names>CF</given-names></string-name>, <string-name><surname>Higham</surname> <given-names>DJ</given-names></string-name> (<year>2019</year>). <article-title>Deep learning: An introduction for applied mathematicians</article-title>. <source><italic>SIAM Review</italic></source>, <volume>61</volume>(<issue>4</issue>): <fpage>860</fpage>–<lpage>891</lpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1137/18M1165748" xlink:type="simple">https://doi.org/10.1137/18M1165748</ext-link></mixed-citation>
</ref>
<ref id="j_jds1167_ref_009">
<mixed-citation publication-type="journal"> <string-name><surname>Hinton</surname> <given-names>GE</given-names></string-name> (<year>2007</year>). <article-title>Learning multiple layers of representation</article-title>. <source><italic>Trends in Cognitive Sciences</italic></source>, <volume>11</volume>(<issue>10</issue>): <fpage>428</fpage>–<lpage>434</lpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1016/j.tics.2007.09.004" xlink:type="simple">https://doi.org/10.1016/j.tics.2007.09.004</ext-link></mixed-citation>
</ref>
<ref id="j_jds1167_ref_010">
<mixed-citation publication-type="journal"> <string-name><surname>Hinton</surname> <given-names>GE</given-names></string-name>, <string-name><surname>Osindero</surname> <given-names>S</given-names></string-name>, <string-name><surname>Teh</surname> <given-names>Y-W</given-names></string-name> (<year>2006</year>). <article-title>A fast learning algorithm for deep belief nets</article-title>. <source><italic>Neural Computation</italic></source>, <volume>18</volume>(<issue>7</issue>): <fpage>1527</fpage>–<lpage>1554</lpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1162/neco.2006.18.7.1527" xlink:type="simple">https://doi.org/10.1162/neco.2006.18.7.1527</ext-link></mixed-citation>
</ref>
<ref id="j_jds1167_ref_011">
<mixed-citation publication-type="journal"> <string-name><surname>Jordan</surname> <given-names>MI</given-names></string-name>, <string-name><surname>Mitchell</surname> <given-names>TM</given-names></string-name> (<year>2015</year>). <article-title>Machine learning: Trends, perspectives, and prospects</article-title>. <source><italic>Science</italic></source>, <volume>349</volume>(<issue>6245</issue>): <fpage>255</fpage>–<lpage>260</lpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1126/science.aaa8415" xlink:type="simple">https://doi.org/10.1126/science.aaa8415</ext-link></mixed-citation>
</ref>
<ref id="j_jds1167_ref_012">
<mixed-citation publication-type="chapter"> <string-name><surname>Katthi</surname> <given-names>JR</given-names></string-name>, <string-name><surname>Ganapathy</surname> <given-names>S</given-names></string-name>, <string-name><surname>Kothinti</surname> <given-names>S</given-names></string-name>, <string-name><surname>Slaney</surname> <given-names>M</given-names></string-name> (<year>2020</year>). <chapter-title>Deep canonical correlation analysis for decoding the auditory brain</chapter-title>. In: <source><italic>2020 42nd Annual International Conference of the IEEE Engineering in Medicine &amp; Biology Society (EMBC)</italic></source>, <fpage>3505</fpage>–<lpage>3508</lpage>.</mixed-citation>
</ref>
<ref id="j_jds1167_ref_013">
<mixed-citation publication-type="chapter"> <string-name><surname>Kingma</surname> <given-names>DP</given-names></string-name>, <string-name><surname>Ba</surname> <given-names>J</given-names></string-name> (<year>2015</year>). <chapter-title>Adam: A method for stochastic optimization</chapter-title>. In: <source><italic>3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7–9, 2015, Conference Track Proceedings</italic></source> (<string-name><given-names>Y</given-names> <surname>Bengio</surname></string-name>, <string-name><given-names>Y</given-names> <surname>LeCun</surname></string-name>, eds.). <publisher-name>ArXiv</publisher-name>, <publisher-loc>Ithaca, NY</publisher-loc>. <uri>https://hdl.handle.net/11245/1.505367</uri>.</mixed-citation>
</ref>
<ref id="j_jds1167_ref_014">
<mixed-citation publication-type="journal"> <string-name><surname>Kramer</surname> <given-names>MA</given-names></string-name> (<year>1991</year>). <article-title>Nonlinear principal component analysis using autoassociative neural networks</article-title>. <source><italic>AIChE Journal</italic></source>, <volume>37</volume>(<issue>2</issue>): <fpage>233</fpage>–<lpage>243</lpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1002/aic.690370209" xlink:type="simple">https://doi.org/10.1002/aic.690370209</ext-link></mixed-citation>
</ref>
<ref id="j_jds1167_ref_015">
<mixed-citation publication-type="book"> <string-name><surname>Magnus</surname> <given-names>JR</given-names></string-name>, <string-name><surname>Neudecker</surname> <given-names>H</given-names></string-name> (<year>2019</year>). <source><italic>Matrix Differential Calculus with Applications in Statistics and Econometrics</italic></source>. <publisher-name>John Wiley &amp; Sons</publisher-name>.</mixed-citation>
</ref>
<ref id="j_jds1167_ref_016">
<mixed-citation publication-type="book"> <string-name><surname>McCullagh</surname> <given-names>P</given-names></string-name>, <string-name><surname>Nelder</surname> <given-names>J</given-names></string-name> (<year>1989</year>). <source><italic>Generalized Linear Models</italic></source>, <edition>2</edition>nd ed. <publisher-name>Chapman and Hall/CRC</publisher-name>, <publisher-loc>Boca Raton, FL</publisher-loc>.</mixed-citation>
</ref>
<ref id="j_jds1167_ref_017">
<mixed-citation publication-type="other"> <string-name><surname>Muir</surname> <given-names>D</given-names></string-name> (<year>2024</year>). Adam stochastic gradient descent optimization. <uri>https://github.com/DylanMuir/fmin_adam</uri>.</mixed-citation>
</ref>
<ref id="j_jds1167_ref_018">
<mixed-citation publication-type="book"> <string-name><surname>Ripley</surname> <given-names>BD</given-names></string-name> (<year>1996</year>). <source><italic>Pattern Recognition and Neural Networks</italic></source>. <publisher-name>Cambridge University Press</publisher-name>, <publisher-loc>Cambridge; New York</publisher-loc>.</mixed-citation>
</ref>
<ref id="j_jds1167_ref_019">
<mixed-citation publication-type="journal"> <string-name><surname>Schmidt-Hieber</surname> <given-names>J</given-names></string-name> (<year>2020</year>). <article-title>Nonparametric regression using deep neural networks with relu activation function</article-title>. <source><italic>The Annals of Statistics</italic></source>, <volume>48</volume>(<issue>4</issue>): <fpage>1875</fpage>–<lpage>1897</lpage>.</mixed-citation>
</ref>
<ref id="j_jds1167_ref_020">
<mixed-citation publication-type="journal"> <string-name><surname>Vogl</surname> <given-names>TP</given-names></string-name>, <string-name><surname>Mangis</surname> <given-names>J</given-names></string-name>, <string-name><surname>Rigler</surname> <given-names>A</given-names></string-name>, <string-name><surname>Zink</surname> <given-names>W</given-names></string-name>, <string-name><surname>Alkon</surname> <given-names>D</given-names></string-name> (<year>1988</year>). <article-title>Accelerating the convergence of the back-propagation method</article-title>. <source><italic>Biological Cybernetics</italic></source>, <volume>59</volume>: <fpage>257</fpage>–<lpage>263</lpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1007/BF00332914" xlink:type="simple">https://doi.org/10.1007/BF00332914</ext-link></mixed-citation>
</ref>
<ref id="j_jds1167_ref_021">
<mixed-citation publication-type="book"> <string-name><surname>Wahba</surname> <given-names>G</given-names></string-name> (<year>1990</year>). <source><italic>Spline Models for Observational Data</italic></source>. <publisher-name>SIAM</publisher-name>.</mixed-citation>
</ref>
<ref id="j_jds1167_ref_022">
<mixed-citation publication-type="journal"> <string-name><surname>Zhang</surname> <given-names>C</given-names></string-name>, <string-name><surname>Zhu</surname> <given-names>L</given-names></string-name>, <string-name><surname>Shen</surname> <given-names>Y</given-names></string-name> (<year>2023</year>). <article-title>Robust estimation in regression and classification methods for large dimensional data</article-title>. <source><italic>Machine Learning</italic></source>, <volume>112</volume>(<issue>9</issue>): <fpage>3361</fpage>–<lpage>3411</lpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1007/s10994-023-06349-2" xlink:type="simple">https://doi.org/10.1007/s10994-023-06349-2</ext-link></mixed-citation>
</ref>
<ref id="j_jds1167_ref_023">
<mixed-citation publication-type="journal"> <string-name><surname>Zhang</surname> <given-names>S</given-names></string-name>, <string-name><surname>Lu</surname> <given-names>J</given-names></string-name>, <string-name><surname>Zhao</surname> <given-names>H</given-names></string-name> (<year>2024</year>). <article-title>Deep network approximation: Beyond relu to diverse activation functions</article-title>. <source><italic>Journal of Machine Learning Research</italic></source>, <volume>25</volume>(<issue>35</issue>): <fpage>1</fpage>–<lpage>39</lpage>.</mixed-citation>
</ref>
<ref id="j_jds1167_ref_024">
<mixed-citation publication-type="other"> <string-name><surname>Zhong</surname> <given-names>R</given-names></string-name>, <string-name><surname>Zhang</surname> <given-names>J</given-names></string-name>, <string-name><surname>Zhang</surname> <given-names>C</given-names></string-name> (<year>2024</year>). Nonlinear functional principal component analysis using neural networks. arXiv preprint: <uri>https://arxiv.org/abs/2306.14388</uri>.</mixed-citation>
</ref>
</ref-list>
</back>
</article>
