Added chart for years of experience and earnings.
Can select developers by programming language. Colorize dots by country, employment status.
This commit is contained in:
@@ -278,7 +278,180 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "acd193c3-eb73-498c-a8d4-c59c0eb5dcdb",
|
||||
"id": "cdf21b1c-1316-422f-ad14-48150f80366c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"# key = \"DevType\"\n",
|
||||
"# prof = \"Developer, full-stack\"\n",
|
||||
"\n",
|
||||
"key = \"MainBranch\"\n",
|
||||
"prof = \"I am a developer by profession\"\n",
|
||||
"col = \"ConvertedCompYearly\"\n",
|
||||
"\n",
|
||||
"devs = df_no_na[df_no_na[key] == prof ] \n",
|
||||
"pd.set_option('display.float_format', '{:.2f}'.format)\n",
|
||||
"devs.describe()[col]\n",
|
||||
"\n",
|
||||
"# who the hell is making $1/yr \n",
|
||||
"# devs[devs[col] == 1.0]\n",
|
||||
"\n",
|
||||
"# who are the millionaires\n",
|
||||
"# devs[devs[col] > 1e6]\n",
|
||||
"\n",
|
||||
"# who make more than the mean\n",
|
||||
"# devs[devs[col] > 76230.84]\n",
|
||||
"\n",
|
||||
"# who make more than the median\n",
|
||||
"# devs[devs[col] > 63316.00]\n",
|
||||
"\n",
|
||||
"# the ancient ones\n",
|
||||
"so_df[so_df[\"YearsCodePro\"] == 'More than 50 years']\n",
|
||||
"# should drop the 18-24 year old who is either bullshitting or recalls a past life\n",
|
||||
"# 55-64 years old\n",
|
||||
"# 65 years or older"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0e9b0c49-eac6-45e1-83f1-92813e734ef5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# draw count plot of developers based on age\n",
|
||||
"\n",
|
||||
"def visualize_devs(df, lang, key=\"Age\",):\n",
|
||||
" plt.figure()\n",
|
||||
" plt.xticks(rotation=45)\n",
|
||||
" # from:\n",
|
||||
" # print(df[key].unique())\n",
|
||||
" order = ['Under 18 years old', '18-24 years old', \\\n",
|
||||
" '25-34 years old','35-44 years old',\\\n",
|
||||
" '45-54 years old', '55-64 years old', \\\n",
|
||||
" '65 years or older', 'Prefer not to say']\n",
|
||||
" sb.countplot(x=key, data=df, order=order)\n",
|
||||
" title=\"Ages of %s Programmers\" % lang\n",
|
||||
" plt.title(title)\n",
|
||||
" filename= \"images/%s-of-%s-programmers.png\" % (key, lang)\n",
|
||||
" plt.savefig(filename, bbox_inches=\"tight\")\n",
|
||||
"\n",
|
||||
"def get_lang_devs(df, lang):\n",
|
||||
" col = \"LanguageHaveWorkedWith\"\n",
|
||||
" # will not work for single character languages (C, R)\n",
|
||||
" # will mangle Java and JavaScript, Python and MicroPython\n",
|
||||
" return df[ df[col].str.contains(lang, na=False) ] \n",
|
||||
"\n",
|
||||
"def get_c_devs(df, lang=\"C\"):\n",
|
||||
" key = \"LanguageHaveWorkedWith\"\n",
|
||||
" cdevs = []\n",
|
||||
" for index, dev in df.iterrows():\n",
|
||||
" try:\n",
|
||||
" # split string into list\n",
|
||||
" langs_used = dev[key].split(';')\n",
|
||||
" if lang in langs_used:\n",
|
||||
" cdevs.append(dev)\n",
|
||||
" except AttributeError:\n",
|
||||
"# print(dev[key])\n",
|
||||
" pass\n",
|
||||
" return pd.DataFrame(cdevs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "11a1b9fb-db48-4749-8d77-4241a99d7bad",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"visualize_devs( get_c_devs(so_df) , \"C\")\n",
|
||||
"\n",
|
||||
"for lang in [\"Cobol\", \"Prolog\", \"Ada\", \"Python\"]:\n",
|
||||
" foo = get_lang_devs(so_df, lang)\n",
|
||||
" visualize_devs(foo, lang)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "67b56700-5847-4af8-87ec-74249aa95749",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"# focus on people who have given ...\n",
|
||||
"key = \"ConvertedCompYearly\"\n",
|
||||
"key2 = \"YearsCodePro\"\n",
|
||||
"df = so_df.dropna(subset=[key, key2])\n",
|
||||
"\n",
|
||||
"criteria = {\"MainBranch\":\"I am a developer by profession\"}\n",
|
||||
"\n",
|
||||
"#print(df[\"Country\"].unique)\n",
|
||||
"\n",
|
||||
"# criteria[\"Country\"] = \"United States of America\"\n",
|
||||
"for k in criteria:\n",
|
||||
" df = df[df[k] == criteria[k] ] \n",
|
||||
"\n",
|
||||
"jobs = None\n",
|
||||
"# expected C jobs\n",
|
||||
"#jobs = [\"Developer, embedded applications or devices\", \n",
|
||||
"# \"Developer, game or graphics\",\n",
|
||||
"# \"Engineering manager\" , \n",
|
||||
"# \"Project manager\", \n",
|
||||
"# \"Product manager\"\n",
|
||||
"#]\n",
|
||||
"\n",
|
||||
"# expected python jobs\n",
|
||||
"#jobs = [\"Data scientist or machine learning specialist\",\n",
|
||||
"# \"Data or business analyst\",\n",
|
||||
"# \"Data engineer\",\n",
|
||||
"# \"DevOps specialist\",\n",
|
||||
"# \"Developer, QA or test\"\n",
|
||||
"#]\n",
|
||||
"\n",
|
||||
"# chatgpt tells me about filtering with multiple strings\n",
|
||||
"if jobs:\n",
|
||||
" df = df[df.isin(jobs).any(axis=1)]\n",
|
||||
"\n",
|
||||
"# play with these\n",
|
||||
"language = \"Cobol\"\n",
|
||||
"legend = True\n",
|
||||
"NUM_OF_TOO_RICH = 3\n",
|
||||
"# \"Employment\"\n",
|
||||
"hue = \"Country\"\n",
|
||||
"\n",
|
||||
"devs = None\n",
|
||||
"if len(language) > 1:\n",
|
||||
" devs = get_lang_devs(df, language)\n",
|
||||
"else:\n",
|
||||
" devs = get_c_devs(df, lang=language)\n",
|
||||
"replacement_dict = {\n",
|
||||
" 'Less than 1 year': '0.5',\n",
|
||||
" 'More than 50 years': '51',\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# https://stackoverflow.com/questions/47443134/update-column-in-pandas-dataframe-without-warning\n",
|
||||
"pd.options.mode.chained_assignment = None # default='warn'\n",
|
||||
"new_column = devs[key2].replace(replacement_dict)\n",
|
||||
"devs[key2] = pd.to_numeric(new_column, errors='coerce')\n",
|
||||
"# print( devs[key2].unique() )\n",
|
||||
"\n",
|
||||
"indices = devs[key].nlargest(NUM_OF_TOO_RICH).index\n",
|
||||
"devs = devs.drop(indices)\n",
|
||||
"print( len (devs) )\n",
|
||||
"\n",
|
||||
"plt.figure()\n",
|
||||
"plt.xticks(rotation=90)\n",
|
||||
"sb.scatterplot(data=devs, x=key2, y=key, hue=hue, legend=legend)\n",
|
||||
"plt.legend(loc='lower center', bbox_to_anchor=(1.5,0)) \n",
|
||||
"plt.title(\"Annual Salary of %s Developers Over Years of Experience\" %language)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b954a811-e401-48dc-9ba4-263a5f2cf5c5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
|
Reference in New Issue
Block a user