diff --git a/stackoverflow-survey.ipynb b/stackoverflow-survey.ipynb index b9d3d4d..83869f6 100644 --- a/stackoverflow-survey.ipynb +++ b/stackoverflow-survey.ipynb @@ -122,11 +122,14 @@ "# https://seaborn.pydata.org/examples/part_whole_bars.html\n", "\n", "# investigate extrinsic vs intrinsic motivation\n", - "def get_difference(dict1, dict2):\n", + "def get_difference(dict1, dict2, proportion=False):\n", " keys = dict1.keys()\n", " result = dict()\n", " for key in keys:\n", - " result[key] = dict1[key] - dict2[key]\n", + " if proportion:\n", + " result[key] = round((dict1[key] - dict2[key])/dict2[key],2)\n", + " else:\n", + " result[key] = dict1[key] - dict2[key]\n", " return result\n", "\n", "def visualize_diff(diff_dict, color=\"lightblue\", saveto=None):\n", @@ -137,7 +140,6 @@ " df = pd.DataFrame(diff_sorted.items(), columns=['Languages', 'Value'])\n", " plt.figure(figsize=(15,20)) \n", " sb.barplot(x=KEY, y='Languages', data=df, color=color)\n", - " \n", " DELTA = '\\u0394'\n", " for index, value in enumerate(df[KEY]):\n", " # chatgpt annotates my chart\n", @@ -149,24 +151,33 @@ " # Adjust the x position for negative values\n", " plt.text(value, index, DELTA+str(value), va='center', ha='right') \n", " lowest = 0\n", - " offset = 0.5\n", + " offset = 0\n", " positive_values = df[df[KEY] > 0][KEY]\n", " if not positive_values.empty:\n", " lowest = positive_values.min()\n", + " offset = list(positive_values).count(lowest) \n", " if len(positive_values) < len(df):\n", - " # don't draw the line if every value is greater than 0\n", - " plt.axhline(y=df[KEY].tolist().index(lowest) + offset, color='red', linestyle='--')\n", + " # don't draw the line if every value is greater than 0_\n", + " plt.axhline(y=df[KEY].tolist().index(lowest) + (offset-0.5), \n", + " color='red', linestyle='--', zorder=-1)\n", " if saveto is not None:\n", " plt.savefig(saveto, bbox_inches='tight')\n", " \n", - "motiv_diff = get_difference(l2, l1)\n", + "motiv_diff = get_difference(l2, l1, proportion=True)\n", "# print(motiv_diff)\n", "visualize_diff(motiv_diff, saveto=\"images/delta.png\")\n", + "motiv_diff = get_difference(l2, l1)\n", + "visualize_diff(motiv_diff, saveto=\"images/delta-b.png\")\n", + "\n", + "# no clear description of what \"admired\" is\n", + "# in the schema\n", + "# but generally people want to use the languages\n", + "# they admire\n", "\n", "# determine level of hype\n", - "hype = get_difference(l4, l3)\n", + "# hype = get_difference(l4, l3)\n", "# print(hype)\n", - "visualize_diff(hype, color=\"red\")" + "# visualize_diff(hype, color=\"red\")" ] }, { @@ -263,6 +274,14 @@ "# earn less than the mean compensation\n", "# (what titles have high standard deviations in earnings)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "acd193c3-eb73-498c-a8d4-c59c0eb5dcdb", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {