diff --git a/stackoverflow-survey.ipynb b/stackoverflow-survey.ipynb index 81f925e..d2ce02c 100644 --- a/stackoverflow-survey.ipynb +++ b/stackoverflow-survey.ipynb @@ -36,7 +36,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e5070e38-8b93-4dc2-9ddb-9a06283ef8d9", + "id": "35b9727a-176c-4193-a1f9-a508aecd2d1c", "metadata": {}, "outputs": [], "source": [ @@ -45,9 +45,6 @@ "#keys re: languages are:\n", "#LanguageHaveWorkedWith,LanguageWantToWorkWith,LanguageAdmired,LanguageDesired\n", "\n", - "# draw horizontal bar plot\n", - "# https://seaborn.pydata.org/examples/part_whole_bars.html\n", - "\n", "# draw as strip chart\n", "# https://seaborn.pydata.org/generated/seaborn.stripplot.html#seaborn.stripplot\n", "\n", @@ -100,21 +97,64 @@ "l3 = get_langs( so_df, \"LanguageAdmired\")\n", "l4 = get_langs( so_df, \"LanguageWantToWorkWith\")\n", "visualize_langs(l3, l4, label1=\"admired\", label2=\"want to work with\")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0bfdb92-378a-4452-91cc-4d21afd2d6cc", + "metadata": {}, + "outputs": [], + "source": [ + "# draw horizontal bar plot\n", + "# https://seaborn.pydata.org/examples/part_whole_bars.html\n", "\n", - "# determine extrinsic vs intrinsic motivation\n", + "# investigate extrinsic vs intrinsic motivation\n", "def get_difference(dict1, dict2):\n", " keys = dict1.keys()\n", " result = dict()\n", " for key in keys:\n", " result[key] = dict1[key] - dict2[key]\n", " return result\n", + "\n", + "def visualize_diff(diff_dict, color=\"lightblue\"):\n", + " diff_sorted = dict(\n", + " sorted(diff_dict.items(), key=lambda item: item[1], reverse=True)\n", + " )\n", + " KEY = \"Value\"\n", + " df = pd.DataFrame(diff_sorted.items(), columns=['Languages', 'Value'])\n", + " plt.figure(figsize=(15,20)) \n", + " sb.barplot(x=KEY, y='Languages', data=df, color=color)\n", + " \n", + " DELTA = '\\u0394'\n", + " for index, value in enumerate(df[KEY]):\n", + " # chatgpt annotates my chart\n", + " # Position the text at the base of the bar\n", + " if value >= 0:\n", + " # Adjust the x position for positive values\n", + " plt.text(value, index, DELTA+str(value), va='center', ha=\"left\") \n", + " else:\n", + " # Adjust the x position for negative values\n", + " plt.text(value, index, DELTA+str(value), va='center', ha='right') \n", + " lowest = 0\n", + " offset = 0.5\n", + " positive_values = df[df[KEY] > 0][KEY]\n", + " if not positive_values.empty:\n", + " lowest = positive_values.min()\n", + " if len(positive_values) < len(df):\n", + " # don't draw the line if every value is greater than 0\n", + " plt.axhline(y=df[KEY].tolist().index(lowest) + offset, color='red', linestyle='--')\n", + " pass\n", " \n", "motiv_diff = get_difference(l2, l1)\n", - "print(motiv_diff)\n", + "# print(motiv_diff)\n", + "visualize_diff(motiv_diff)\n", "\n", "# determine level of hype\n", - "hype = get_difference(l3, l4)\n", - "print(hype)\n" + "hype = get_difference(l4, l3)\n", + "# print(hype)\n", + "visualize_diff(hype, color=\"red\")" ] }, { @@ -128,7 +168,7 @@ "employment_status = Counter(so_df[\"MainBranch\"])\n", "print(employment_status)\n", "\n", - "print(so_df[\"ConvertedCompYearly\"][:3])" + "print(so_df[\"ConvertedCompYearly\"][])" ] }, {