mwd code and figs

2019-11-18 10:29:19 -06:00
parent 238119d740
commit 3badc8392e
3 changed files with 169 additions and 33 deletions
--- a/mwd_gpu/figs/mwdInputOutput.png
+++ b/mwd_gpu/figs/mwdInputOutput.png
--- a/mwd_gpu/mwd.tar.gz
+++ b/mwd_gpu/mwd.tar.gz
--- a/mwd_gpu/mwdGpu.tex
+++ b/mwd_gpu/mwdGpu.tex
@@ -5,48 +5,27 @@
 \usepackage{textcomp}
 \usepackage{epsfig}
 \usepackage{hyperref}
+\usepackage{hyphenat}
 \usepackage[noabbrev, capitalize]{cleveref} % hyperref must be loaded first
 \usepackage[
-  detect-weight=true,
-  per=slash,
-  detect-family=true,
-  separate-uncertainty=true]{siunitx}
-% \usepackage{listings}
+detect-weight=true,
+per=slash,
+detect-family=true,
+separate-uncertainty=true]{siunitx}
+
 \usepackage[dvipsnames]{xcolor}
 \usepackage{upquote}

 \usepackage[framemethod=tikz]{mdframed}
 \usepackage{adjustbox}
-
-% \definecolor{greybg}{rgb}{0.25,0.25,0.25}
-% \definecolor{yellowbg}{rgb}{0.91, 0.84, 0.42}
-% \definecolor{bananamania}{rgb}{0.98, 0.91, 0.71}
-
-\mdfdefinestyle{warning}{%
-  linecolor=red!70,
-  frametitle={Warning},
-  frametitlerule=true,
-  frametitlebackgroundcolor=orange!40,
-  backgroundcolor=orange!30,
-  innertopmargin=\topskip,
-  roundcorner=8pt,
-  linewidth=1pt,
+\usepackage{listings}
+\usepackage{xparse}
+\NewDocumentCommand{\codeword}{v}{%
+  \texttt{\textcolor{blue}{#1}}%
 }
-% \mdtheorem[style=theoremstyle]{warning}{Warning}
+\lstset{language=C++,keywordstyle={\bfseries \color{blue}}}

-\mdfdefinestyle{listing}{%
-  linecolor=Aquamarine!50,
-  linewidth=1pt,
-  backgroundcolor=yellow!40,
-  roundcorner=8pt,
-  % frametitlerule=true,
-  % frametitlebackgroundcolor=yellow!50,
-  innertopmargin=\topskip,
-}
-% \mdtheorem[style=listing]{listing}{Listing}
-
-% \DeclareSIUnit\eVperc{\eV\per\clight}
-% \DeclareSIUnit\clight{\text{\ensuremath{c}}}
+\usepackage{forest}

 \begin{document}
 \title{Moving window decomposition implementation using GPU}
@@ -54,4 +33,161 @@
 \date{\today}
 \maketitle

+\section{Introduction}%
+\label{sec:introduction}
+The purpose of this work is assessing the feasibility and performance of using
+GPU to process raw waveforms from a HPGe detector. Expected result of the MWD
+algorithm is shown in \cref{fig:mwdInputOutput}. The input is a
+\num{250000}-sample long waveform taken at the preamplifier output of the HPGe
+detector. The MWD algorithm transforms each jump in the input waveform into a
+flat-top peak, of which height is proportional to charge deposited in the
+detector. 
+
+\begin{figure}[tbp]
+  \centering
+  \includegraphics[width=0.90\linewidth]{figs/mwdInputOutput}
+  \caption{MWD algorithm: input waveform on the left, and expected output on
+    the right.}%
+  \label{fig:mwdInputOutput}
+\end{figure}
+
+\section{Set up}%
+\label{sec:set_up}
+\subsection{Hardware}%
+\label{sub:hardware}
+There are two consumer computers used in this study:
+\begin{itemize}
+  \item PC 1\@:
+  \begin{itemize}
+    \item CPU\@: AMD Ryzen 5 2400G, running at \SI{3.60}{\giga\hertz},
+      maximum frequency \SI{3.90}{\giga\hertz}
+    \item GPU\@: GeForce GTX 1060, DDR5 memory \SI{6}{GB},
+      maximum frequency \SI{1.70}{\giga\hertz}
+  \end{itemize}
+  \item PC 2\@:
+  \begin{itemize}
+    \item CPU\@: Intel Core i5\hyp{}4590 CPU, running at \SI{3.30}{\giga\hertz},
+      maximum frequency \SI{3.70}{\giga\hertz}
+    \item GPU\@: GeForce GTX 1650, DDR5 memory \SI{4}{GB},
+      maximum frequency \SI{1.70}{\giga\hertz}
+  \end{itemize}
+\end{itemize}
+
+\subsection{Software}%
+\label{sub:software}
+The computers run two different versions of Linux:
+\begin{itemize}
+  \item PC 1\@: CentOS 7.2
+    \begin{itemize}
+      \item gcc
+      \item CUDA
+    \end{itemize}
+  \item PC 2\@: Debian 10.2
+    \begin{itemize}
+      \item gcc (Debian 8.3.0\hyp{}6) 8.3.0
+      \item nvcc V9.2.148, CUDA 10.1, driver version 418.74
+    \end{itemize}
+\end{itemize}
+
+\section{Implementations}%
+\label{sec:implementations}
+There are two implementations of the MWD algorithm:
+\begin{itemize}
+  \item C++ implementation which does all calculations on the CPU. This will be
+    used to verify the accuracy of the other code, as well as a benchmark
+  \item CUDA implementation: offloads the digital pule processing part on to
+    the GPU, CPU only handles input/output related tasks
+\end{itemize}
+
+\subsection{C++ code}%
+\label{sub:c_code}
+This implementation uses raw array wrapped in a \codeword{struct} to represent
+waveforms, pointers are managed manually. There are 3 methods
+\codeword{Deconvolute}, \codeword{OffsetDifferentiate}, and
+\codeword{MovingAverage} corresponds to 3 stages of the MWD algorithm.
+
+The related files are:
+
+\begin{forest}
+  for tree={
+    font=\ttfamily,
+    grow'=0,
+    child anchor=west,
+    parent anchor=south,
+    anchor=west,
+    calign=first,
+    edge path={
+      \noexpand\path [draw, \forestoption{edge}]
+      (!u.south west) +(7.5pt,0) |- node[fill,inner sep=1.25pt] {} (.child anchor)\forestoption{edge label};
+    },
+    before typesetting nodes={
+      if n=1
+        {insert before={[,phantom]}}
+        {}
+    },
+    fit=band,
+    before computing xy={l=15pt},
+  }
+  [mwd
+    [mwd.c]
+    [srcs
+      [vector.h]
+      [vector.c]
+      [algo.h]
+      [algo.c]
+    ]
+  ]
+\end{forest}
+
+\subsection{CUDA code}%
+\label{sub:cuda_code}
+The CUDA code implements 3 GPU functions \codeword{gpuDeconvolute},
+\codeword{gpuOffsetDifferentiate}, and \codeword{gpuMovingAverage} which
+replace 3 C++ methods in the other implementation. There are also helpers for
+moving data between main memory and GPU memory, error checking and time
+keeping. The I/O part is the same as in the C++ version.
+
+Related files are:
+
+\begin{forest}
+  for tree={
+    font=\ttfamily,
+    grow'=0,
+    child anchor=west,
+    parent anchor=south,
+    anchor=west,
+    calign=first,
+    edge path={
+      \noexpand\path [draw, \forestoption{edge}]
+      (!u.south west) +(7.5pt,0) |- node[fill,inner sep=1.25pt] {} (.child anchor)\forestoption{edge label};
+    },
+    before typesetting nodes={
+      if n=1
+        {insert before={[,phantom]}}
+        {}
+    },
+    fit=band,
+    before computing xy={l=15pt},
+  }
+  [mwd
+  [gmwd.cu]
+    [srcs
+      [gpuAlgo.cu]
+      [gpuAlgo.h]
+      [gpuTimer.h]
+      [gpuUtils.h]
+      [prefixScan.cu]
+      [prefixScan.h]
+    ]
+  ]
+\end{forest}
+
+\section{Results}%
+\label{sec:results}
+
+\section{Code}%
+\label{sec:code}
+A tarball of the code is attached as \codeword{mwd.tar.gz}
+
+
 \end{document}