Showed difference in admired and desired languages.

Visualizations for "have worked with" and 'want to work with".
2025-04-18 19:07:54 -07:00
parent be0ee359d4
commit 59857f8d36
1 changed files with 49 additions and 9 deletions
--- a/stackoverflow-survey.ipynb
+++ b/stackoverflow-survey.ipynb
@@ -36,7 +36,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "e5070e38-8b93-4dc2-9ddb-9a06283ef8d9",
+   "id": "35b9727a-176c-4193-a1f9-a508aecd2d1c",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -45,9 +45,6 @@
    "#keys re: languages are:\n",
    "#LanguageHaveWorkedWith,LanguageWantToWorkWith,LanguageAdmired,LanguageDesired\n",
    "\n",
-    "# draw horizontal bar plot\n",
-    "# https://seaborn.pydata.org/examples/part_whole_bars.html\n",
-    "\n",
    "# draw as strip chart\n",
    "# https://seaborn.pydata.org/generated/seaborn.stripplot.html#seaborn.stripplot\n",
    "\n",
@@ -100,21 +97,64 @@
    "l3 = get_langs( so_df, \"LanguageAdmired\")\n",
    "l4 = get_langs( so_df, \"LanguageWantToWorkWith\")\n",
    "visualize_langs(l3, l4, label1=\"admired\", label2=\"want to work with\")\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d0bfdb92-378a-4452-91cc-4d21afd2d6cc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# draw horizontal bar plot\n",
+    "# https://seaborn.pydata.org/examples/part_whole_bars.html\n",
    "\n",
-    "# determine extrinsic vs intrinsic motivation\n",
+    "# investigate extrinsic vs intrinsic motivation\n",
    "def get_difference(dict1, dict2):\n",
    "    keys = dict1.keys()\n",
    "    result = dict()\n",
    "    for key in keys:\n",
    "        result[key] = dict1[key] - dict2[key]\n",
    "    return result\n",
+    "\n",
+    "def visualize_diff(diff_dict, color=\"lightblue\"):\n",
+    "    diff_sorted = dict(\n",
+    "        sorted(diff_dict.items(), key=lambda item: item[1], reverse=True)\n",
+    "    )\n",
+    "    KEY = \"Value\"\n",
+    "    df    = pd.DataFrame(diff_sorted.items(), columns=['Languages', 'Value'])\n",
+    "    plt.figure(figsize=(15,20)) \n",
+    "    sb.barplot(x=KEY, y='Languages', data=df, color=color)\n",
+    "    \n",
+    "    DELTA =  '\\u0394'\n",
+    "    for index, value in enumerate(df[KEY]):\n",
+    "    # chatgpt annotates my chart\n",
+    "    # Position the text at the base of the bar\n",
+    "        if value >= 0:\n",
+    "            # Adjust the x position for positive values\n",
+    "            plt.text(value, index, DELTA+str(value), va='center', ha=\"left\")  \n",
+    "        else:\n",
+    "             # Adjust the x position for negative values\n",
+    "            plt.text(value, index,  DELTA+str(value), va='center',  ha='right') \n",
+    "    lowest = 0\n",
+    "    offset = 0.5\n",
+    "    positive_values = df[df[KEY] > 0][KEY]\n",
+    "    if not positive_values.empty:\n",
+    "        lowest = positive_values.min()\n",
+    "    if len(positive_values) < len(df):\n",
+    "        # don't draw the line if every value is greater than 0\n",
+    "        plt.axhline(y=df[KEY].tolist().index(lowest) + offset, color='red', linestyle='--')\n",
+    "    pass\n",
    "    \n",
    "motiv_diff = get_difference(l2, l1)\n",
-    "print(motiv_diff)\n",
+    "# print(motiv_diff)\n",
+    "visualize_diff(motiv_diff)\n",
    "\n",
    "# determine level of hype\n",
-    "hype = get_difference(l3, l4)\n",
-    "print(hype)\n"
+    "hype = get_difference(l4, l3)\n",
+    "# print(hype)\n",
+    "visualize_diff(hype, color=\"red\")"
   ]
  },
  {
@@ -128,7 +168,7 @@
    "employment_status = Counter(so_df[\"MainBranch\"])\n",
    "print(employment_status)\n",
    "\n",
-    "print(so_df[\"ConvertedCompYearly\"][:3])"
+    "print(so_df[\"ConvertedCompYearly\"][])"
   ]
  },
  {