From be0ee359d400b8ad4eff95b338953684d3ef96f7 Mon Sep 17 00:00:00 2001 From: - <-> Date: Fri, 18 Apr 2025 07:00:13 -0700 Subject: [PATCH] Initial commit. Exploring the popularity of programming languages --- stackoverflow-survey.ipynb | 164 +++++++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 stackoverflow-survey.ipynb diff --git a/stackoverflow-survey.ipynb b/stackoverflow-survey.ipynb new file mode 100644 index 0000000..81f925e --- /dev/null +++ b/stackoverflow-survey.ipynb @@ -0,0 +1,164 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "74e4cf25-6649-4633-89ea-03ffc2e23caa", + "metadata": {}, + "outputs": [], + "source": [ + "from collections import Counter\n", + "\n", + "import pandas as pd\n", + "import seaborn as sb\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# avoid burning my eyes @ night\n", + "plt.style.use(\"dark_background\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2b80545-2481-4ee8-8d43-ffd4a612a397", + "metadata": {}, + "outputs": [], + "source": [ + "FILE = \"data/survey_results_public.csv\"\n", + "so_df = pd.read_csv(FILE)\n", + "\n", + "print(so_df.keys())\n", + "so_df.describe()\n", + "\n", + "# print(so_df[:3])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5070e38-8b93-4dc2-9ddb-9a06283ef8d9", + "metadata": {}, + "outputs": [], + "source": [ + "# get popularity of different programming languages\n", + "\n", + "#keys re: languages are:\n", + "#LanguageHaveWorkedWith,LanguageWantToWorkWith,LanguageAdmired,LanguageDesired\n", + "\n", + "# draw horizontal bar plot\n", + "# https://seaborn.pydata.org/examples/part_whole_bars.html\n", + "\n", + "# draw as strip chart\n", + "# https://seaborn.pydata.org/generated/seaborn.stripplot.html#seaborn.stripplot\n", + "\n", + "def get_langs(dataset, key=\"LanguageHaveWorkedWith\"):\n", + " lang_count = Counter()\n", + " assert(key in dataset.keys())\n", + " for response in dataset[key]:\n", + " if type(response) == str:\n", + " lang_count.update(response.split(';'))\n", + " langs_by_popularity = dict(\n", + " sorted(lang_count.items(), key=lambda item: item[1], reverse=True)\n", + " )\n", + " return langs_by_popularity\n", + "\n", + "def visualize_langs(langs, langs2, label1 = \"condition1\", label2 = \"condition2\"):\n", + " DOT_COLOR1 = \"lightblue\"\n", + " DOT_COLOR2 = \"red\"\n", + " BG_COLOR = \"black\" \n", + " df = pd.DataFrame(langs.items(), columns=['Languages', 'Count'])\n", + " df2 = pd.DataFrame(langs2.items(), columns=['Languages', 'Count'])\n", + " \n", + " plt.figure(figsize=(10,15)) \n", + " \n", + " sb.stripplot(x='Count', y='Languages', data=df, \\\n", + " size=5, color=DOT_COLOR1, label=\"have worked with\", jitter=True)\n", + " sb.stripplot(x='Count', y='Languages', data=df2, \\\n", + " size=5, color=DOT_COLOR2, label=\"want to work with\", jitter=True)\n", + " \n", + " # chatgpt draws my legend\n", + " # Create custom legend handles to avoid duplicates\n", + " # color = 'w' means do not draw line bissecting point\n", + " blue_patch = plt.Line2D(\n", + " [0], [0], marker='o', color=BG_COLOR, \\\n", + " label=label1, markerfacecolor=DOT_COLOR1, markersize=10)\n", + " red_patch = plt.Line2D(\n", + " [0], [0], marker='o', color=BG_COLOR, \\\n", + " label=label2, markerfacecolor=DOT_COLOR2, markersize=10)\n", + " \n", + " # Show the legend with custom handles\n", + " plt.legend(handles=[blue_patch, red_patch], loc=\"center right\")\n", + " \n", + " plt.grid(axis='x', linestyle='--', alpha=0.75) \n", + " plt.title(\"%s vs %s\" % (label1, label2))\n", + " del df, df2\n", + "\n", + "l1 = get_langs( so_df )\n", + "l2 = get_langs( so_df, \"LanguageWantToWorkWith\" )\n", + "visualize_langs(l1,l2, label1=\"have worked with\", label2=\"want to work with\")\n", + "\n", + "l3 = get_langs( so_df, \"LanguageAdmired\")\n", + "l4 = get_langs( so_df, \"LanguageWantToWorkWith\")\n", + "visualize_langs(l3, l4, label1=\"admired\", label2=\"want to work with\")\n", + "\n", + "# determine extrinsic vs intrinsic motivation\n", + "def get_difference(dict1, dict2):\n", + " keys = dict1.keys()\n", + " result = dict()\n", + " for key in keys:\n", + " result[key] = dict1[key] - dict2[key]\n", + " return result\n", + " \n", + "motiv_diff = get_difference(l2, l1)\n", + "print(motiv_diff)\n", + "\n", + "# determine level of hype\n", + "hype = get_difference(l3, l4)\n", + "print(hype)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e90cf119-c50d-468a-bc87-72dac41176ce", + "metadata": {}, + "outputs": [], + "source": [ + "# print survey ans\n", + "employment_status = Counter(so_df[\"MainBranch\"])\n", + "print(employment_status)\n", + "\n", + "print(so_df[\"ConvertedCompYearly\"][:3])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6b1a935-eeda-416f-8adf-5e854d3aa066", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}