diff --git a/stackoverflow-survey.ipynb b/stackoverflow-survey.ipynb
index 60df211..7b994f5 100644
--- a/stackoverflow-survey.ipynb
+++ b/stackoverflow-survey.ipynb
@@ -74,14 +74,14 @@
    "source": [
     "# draw count plot of developers based on age\n",
     "\n",
-    "def visualize_devs(df, lang, key='Age'):\n",
+    "def visualize_devs(df, title, key='Age'):\n",
     "    '''\n",
     "    Draws count plot of developers based on attributes.\n",
     "\n",
     "    inputs:\n",
-    "        df:   a DataFrame, the subset of the data set.\n",
-    "        lang: string, the programming language, used for labeling.\n",
-    "        key:  string, the attribute to count (age).\n",
+    "        df:    a DataFrame, the subset of the data set.\n",
+    "        title: string, title of the chart.\n",
+    "        key:   string, the attribute to count (age).\n",
     "    outputs:\n",
     "        no return values, will draw and save a graphic.\n",
     "    '''\n",
@@ -94,9 +94,8 @@
     "              '45-54 years old', '55-64 years old',  \\\n",
     "              '65 years or older', 'Prefer not to say']\n",
     "    sb.countplot(x=key, data=df, order=order)\n",
-    "    title='Ages of %s Programmers' % lang\n",
     "    plt.title(title)\n",
-    "    filename= 'images/%s-of-%s-programmers.png' % (key, lang)\n",
+    "    filename= 'images/%s.png' % title.replace(\" \", \"-\")\n",
     "    plt.savefig(filename, bbox_inches='tight')\n",
     "\n",
     "\n",
@@ -153,12 +152,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "visualize_devs( get_c_devs(so_df) , 'C')\n",
-    "visualize_devs( get_c_devs(so_df, lang='Python') , 'Python')\n",
+    "visualize_devs( get_c_devs(so_df) , 'Ages of C Programmers')\n",
+    "visualize_devs( get_c_devs(so_df, lang='Python') , 'Ages of Python Programmers')\n",
     "\n",
     "for lang in ['Cobol', 'Prolog', 'Ada']:\n",
+    "    title = 'Ages of %s Programmers' % lang\n",
     "    foo = get_lang_devs(so_df, lang)\n",
-    "    visualize_devs(foo, lang)"
+    "    visualize_devs(foo, title)"
    ]
   },
   {
@@ -168,7 +168,19 @@
    "source": [
     "## Preparing the Data\n",
     "\n",
-    "`__init__()` specifies which rows to omit and which to use, so the data for modeling doesn't look like a shotgun blast of rainbow colors."
+    "`__init__()` specifies which rows to omit and which to use, so the data for modeling doesn't look like a shotgun blast of rainbow colors.\n",
+    "\n",
+    "### NaNs are dropped\n",
+    "\n",
+    "No values are assumed in the place of NaN.\n",
+    "\n",
+    "Missing values are dropped for developers who:\n",
+    "* did not specify their years of professional experience\n",
+    "* did not disclose an annual compensation.\n",
+    "\n",
+    "Roughly half of the developers that fit the criteria for analysis (42% for Python, 54% for C) have not specified their annual compensation. This analysis makes no assumptions on the reason. \n",
+    "\n",
+    "The age distribution is similar between those who specified annual compensation and those who declined to do so. This supports that the analysis is not significantly altered by missing data.\n"
    ]
   },
   {
@@ -187,7 +199,7 @@
     "\n",
     "# still haven't come up with a name\n",
     "class Foo:\n",
-    "    def __init__(self, dataset, language, jobs=None, \n",
+    "    def __init__(self, df, language, jobs=None, \n",
     "                 n_rich_outliers=0, n_poor_outliers=0, \n",
     "                 country='United States of America'):\n",
     "        '''\n",
@@ -210,10 +222,9 @@
     "        # focus on people who have given ...\n",
     "        key_x  = 'YearsCodePro'\n",
     "        key_y  = 'ConvertedCompYearly'\n",
-    "        df   = dataset.dropna(subset=[key_x, key_y])\n",
     "        self.key_x = key_x\n",
     "        self.key_y = key_y\n",
-    "    \n",
+    "\n",
     "        qualifiers = {\n",
     "            'MainBranch': 'I am a developer by profession',\n",
     "       }\n",
@@ -231,7 +242,11 @@
     "            devs = get_c_devs(df, lang=language)\n",
     "        else:\n",
     "            devs = get_lang_devs(df, language)\n",
-    "        \n",
+    "\n",
+    "        self.df_no_x = devs[devs[key_x].isnull()]\n",
+    "        self.df_no_y = devs[devs[key_y].isnull()]\n",
+    "        devs  = devs.dropna(subset=[key_x, key_y])\n",
+    "\n",
     "        replacement_dict = {\n",
     "            'Less than 1 year': '0.5',\n",
     "            'More than 50 years': '51',\n",
@@ -402,6 +417,21 @@
     "        filename = base_filename % (self.language, self.country)\n",
     "        plt.savefig(filename.replace(' ', '-'), bbox_inches='tight')\n",
     "\n",
+    "    def probe_excluded_rows(self):\n",
+    "        '''\n",
+    "        Display information about developers excluded from analysis.\n",
+    "        '''\n",
+    "        nan_x_count = self.df_no_x.shape[0]\n",
+    "        nan_y_count = self.df_no_y.shape[0]\n",
+    "        print(nan_x_count, 'did not specify', self.key_x)\n",
+    "        print(nan_y_count, 'did not specify', self.key_y)\n",
+    "        print('total developers:', self.devs.shape[0] \n",
+    "              + nan_x_count + nan_y_count)\n",
+    "        title1 = 'Age of %s Programmers excluded from analysis'\n",
+    "        visualize_devs(self.df_no_y, title1 % self.language)\n",
+    "        title2 = 'Age of %s programmers included in analysis'\n",
+    "        visualize_devs(self.devs, title2 % self.language)\n",
+    "    \n",
     "def show_model_stats(coef, intercept, y_test, y_pred, label):\n",
     "    '''\n",
     "    Displays model performance.\n",
@@ -474,7 +504,8 @@
     "#python.run_regression(x_transform=log_base_a, change_base=1.20, risky=2, random=555, \n",
     "#                      color='pink', name='Risky regression line')\n",
     "python.run_log_regression(nodraw=False)\n",
-    "python.export_image()"
+    "python.export_image()\n",
+    "python.probe_excluded_rows()"
    ]
   },
   {
@@ -530,7 +561,8 @@
     "c.run_regression(x_transform=log_base_a, change_base=1.3, \n",
     "                 x_shift=2, y_shift=-5000, color='magenta', random=555)\n",
     "c.run_log_regression(nodraw=False)\n",
-    "c.export_image()"
+    "c.export_image()\n",
+    "c.probe_excluded_rows()"
    ]
   },
   {