From c6096cfe6c3498d4f1af7fe26e77cadb98237899 Mon Sep 17 00:00:00 2001 From: scuti Date: Sat, 19 Apr 2025 20:03:19 -0700 Subject: [PATCH] Added new scatter plot. Shows the difference between usage and desire to use a lang (=y) over the usage of language (=x). --- stackoverflow-survey.ipynb | 48 +++++++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/stackoverflow-survey.ipynb b/stackoverflow-survey.ipynb index 3e7f5cb..b9d3d4d 100644 --- a/stackoverflow-survey.ipynb +++ b/stackoverflow-survey.ipynb @@ -169,6 +169,46 @@ "visualize_diff(hype, color=\"red\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6b1a935-eeda-416f-8adf-5e854d3aa066", + "metadata": {}, + "outputs": [], + "source": [ + "# do people fall out of love with langs\n", + "# the more they are used professionally?\n", + "\n", + "def visualize_favor(df, key_x, key_y, MAGIC_X=0, MAGIC_Y=0, title=str(), saveto=None):\n", + " plt.figure()\n", + " OFFSET = 1 # push text away from point slightly\n", + " for i in range(merged.shape[0]):\n", + " # label points that aren't un a cluster\n", + " if merged[key_x][i] > MAGIC_X or merged[key_y][i] > MAGIC_Y:\n", + " plt.text(merged[key_x].iloc[i]+OFFSET, \n", + " merged[key_y].iloc[i]+OFFSET, \n", + " merged[\"Language\"].iloc[i], \n", + " ha=\"left\",\n", + " size='medium')\n", + "\n", + " sb.scatterplot(data=merged, x=key_x, y=key_y, hue=\"Language\")\n", + " plt.legend(loc='lower left', bbox_to_anchor=(0, -1.25), ncol=3) \n", + " plt.title(title)\n", + " if saveto is not None:\n", + " plt.savefig(saveto, bbox_inches='tight')\n", + " pass\n", + "key_x = \"Have Worked With\"\n", + "key_y = \"Want To Use and Have Used Difference\"\n", + "df1 = pd.DataFrame(l1.items(), columns=['Language', key_x])\n", + "df2 = pd.DataFrame(motiv_diff.items(), columns=['Language', key_y])\n", + "# chatgpt tells me how to combine df\n", + "merged = pd.merge(df1, df2[[\"Language\", key_y]], on='Language', how='left')\n", + "visualize_favor(merged, key_x, key_y, \n", + " MAGIC_X=5000, MAGIC_Y=2000, \n", + " saveto=\"images/favor.png\")\n", + "del df1, df2, merged" + ] + }, { "cell_type": "code", "execution_count": null, @@ -223,14 +263,6 @@ "# earn less than the mean compensation\n", "# (what titles have high standard deviations in earnings)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f6b1a935-eeda-416f-8adf-5e854d3aa066", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": {