From c6096cfe6c3498d4f1af7fe26e77cadb98237899 Mon Sep 17 00:00:00 2001
From: scuti <scuti@tutamail.com>
Date: Sat, 19 Apr 2025 20:03:19 -0700
Subject: [PATCH] Added new scatter plot.

Shows the difference between usage and desire to use a lang (=y)
over the usage of language (=x).
---
 stackoverflow-survey.ipynb | 48 +++++++++++++++++++++++++++++++-------
 1 file changed, 40 insertions(+), 8 deletions(-)

diff --git a/stackoverflow-survey.ipynb b/stackoverflow-survey.ipynb
index 3e7f5cb..b9d3d4d 100644
--- a/stackoverflow-survey.ipynb
+++ b/stackoverflow-survey.ipynb
@@ -169,6 +169,46 @@
     "visualize_diff(hype, color=\"red\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f6b1a935-eeda-416f-8adf-5e854d3aa066",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# do people fall out of love with langs\n",
+    "# the more they are used professionally?\n",
+    "\n",
+    "def visualize_favor(df, key_x, key_y, MAGIC_X=0, MAGIC_Y=0, title=str(), saveto=None):\n",
+    "    plt.figure()\n",
+    "    OFFSET = 1 # push text away from point slightly\n",
+    "    for i in range(merged.shape[0]):\n",
+    "        # label points that aren't un a cluster\n",
+    "        if merged[key_x][i] > MAGIC_X or merged[key_y][i] > MAGIC_Y:\n",
+    "            plt.text(merged[key_x].iloc[i]+OFFSET, \n",
+    "                     merged[key_y].iloc[i]+OFFSET, \n",
+    "                     merged[\"Language\"].iloc[i], \n",
+    "                     ha=\"left\",\n",
+    "                     size='medium')\n",
+    "\n",
+    "    sb.scatterplot(data=merged, x=key_x, y=key_y, hue=\"Language\")\n",
+    "    plt.legend(loc='lower left', bbox_to_anchor=(0, -1.25), ncol=3) \n",
+    "    plt.title(title)\n",
+    "    if saveto is not None:\n",
+    "        plt.savefig(saveto, bbox_inches='tight')\n",
+    "    pass\n",
+    "key_x  = \"Have Worked With\"\n",
+    "key_y  = \"Want To Use and Have Used Difference\"\n",
+    "df1    = pd.DataFrame(l1.items(), columns=['Language', key_x])\n",
+    "df2    = pd.DataFrame(motiv_diff.items(), columns=['Language', key_y])\n",
+    "# chatgpt tells me how to combine df\n",
+    "merged = pd.merge(df1, df2[[\"Language\", key_y]], on='Language', how='left')\n",
+    "visualize_favor(merged, key_x, key_y, \n",
+    "                MAGIC_X=5000, MAGIC_Y=2000, \n",
+    "                saveto=\"images/favor.png\")\n",
+    "del df1, df2, merged"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -223,14 +263,6 @@
     "# earn less than the mean compensation\n",
     "# (what titles have high standard deviations in earnings)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f6b1a935-eeda-416f-8adf-5e854d3aa066",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {