{
"cells": [
{
"cell_type": "markdown",
"id": "57849f4a-517b-4906-a04f-def9a0e2c6cc",
"metadata": {},
"source": [
"# Tutorial"
]
},
{
"cell_type": "markdown",
"id": "237cb1a9-eaeb-4c79-84bc-2100de1f5b4e",
"metadata": {},
"source": [
"[![Open in Colab][colab_badge]][colab_link] [![Binder][binder_badge]][binder_link]\n",
"\n",
"[colab_badge]: https://colab.research.google.com/assets/colab-badge.svg\n",
"[colab_link]: https://colab.research.google.com/github/dustalov/evalica/blob/master/docs/tutorial.ipynb\n",
"[binder_badge]: https://mybinder.org/badge_logo.svg\n",
"[binder_link]: https://mybinder.org/v2/gh/dustalov/evalica/HEAD?labpath=docs/tutorial.ipynb"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b612593d-8be8-401c-b92e-f5253e50027f",
"metadata": {},
"outputs": [],
"source": [
"import evalica\n",
"import pandas as pd\n",
"import plotly.express as px\n",
"from evalica import Winner, alpha_bootstrap, bootstrap, bradley_terry\n",
"\n",
"%config InlineBackend.figure_formats = ['svg']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cfed1bc3-7b95-4af6-b234-f94240706e24",
"metadata": {},
"outputs": [],
"source": [
"evalica.__version__"
]
},
{
"cell_type": "markdown",
"id": "alpha-section",
"metadata": {},
"source": [
"## Pairwise Comparisons"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e647667c-adb8-4f16-9b7c-993cf2739c2b",
"metadata": {},
"outputs": [],
"source": [
"df_food = pd.read_csv(\n",
" \"https://raw.githubusercontent.com/dustalov/evalica/0893fd0f1e8107b2d62fd6c5816b55b417c1a050/food.csv\",\n",
" dtype=str,\n",
")\n",
"\n",
"df_food[\"winner\"] = df_food[\"winner\"].map(\n",
" {\n",
" \"left\": Winner.X,\n",
" \"right\": Winner.Y,\n",
" \"tie\": Winner.Draw,\n",
" },\n",
")\n",
"\n",
"df_food.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b96c16f2-b364-4974-aa41-085564d40681",
"metadata": {},
"outputs": [],
"source": [
"df_food[\"left_id\"], df_food[\"right_id\"], index = evalica.indexing(df_food[\"left\"], df_food[\"right\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ec6ea649-04a5-4abb-961e-63afa4dfbc77",
"metadata": {},
"outputs": [],
"source": [
"matrices = evalica.matrices(df_food[\"left_id\"], df_food[\"right_id\"], df_food[\"winner\"], index)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "42da86e0-c2c6-42c4-9c5e-84d56587fdea",
"metadata": {},
"outputs": [],
"source": [
"pd.DataFrame(matrices.win_matrix, index=index, columns=index) # win matrix"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c5dc51f6-bca7-4153-8265-18c019e8a639",
"metadata": {},
"outputs": [],
"source": [
"pd.DataFrame(matrices.tie_matrix, index=index, columns=index) # tie matrix"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a75c4442-934a-47f1-8e19-18b73e436fcb",
"metadata": {},
"outputs": [],
"source": [
"count_result = evalica.counting(df_food[\"left\"], df_food[\"right\"], df_food[\"winner\"])\n",
"count_result.scores.to_frame()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ce104025-26a4-4a8e-b2dd-1e97ede5592f",
"metadata": {},
"outputs": [],
"source": [
"avr_result = evalica.average_win_rate(df_food[\"left\"], df_food[\"right\"], df_food[\"winner\"])\n",
"avr_result.scores.to_frame()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d561c08e-4a52-435e-a847-b6adf44ab543",
"metadata": {},
"outputs": [],
"source": [
"bt_result = evalica.bradley_terry(df_food[\"left\"], df_food[\"right\"], df_food[\"winner\"])\n",
"bt_result.scores.to_frame()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "64bf8f2b-a08f-46b9-b4df-e5d58d8c8a50",
"metadata": {},
"outputs": [],
"source": [
"newman_result = evalica.newman(df_food[\"left\"], df_food[\"right\"], df_food[\"winner\"])\n",
"newman_result.scores.to_frame()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "35cdcc91-d182-4d1c-842e-251052defcd4",
"metadata": {},
"outputs": [],
"source": [
"eigen_result = evalica.eigen(df_food[\"left\"], df_food[\"right\"], df_food[\"winner\"])\n",
"eigen_result.scores.to_frame()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "39c5d898-944c-472d-abc2-553c54503adf",
"metadata": {},
"outputs": [],
"source": [
"elo_result = evalica.elo(df_food[\"left\"], df_food[\"right\"], df_food[\"winner\"])\n",
"elo_result.scores.to_frame()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "66e1a1c5-b32d-4988-92b3-7549fbad3845",
"metadata": {},
"outputs": [],
"source": [
"df_bt_pairwise = evalica.pairwise_frame(bt_result.scores)\n",
"\n",
"df_bt_pairwise"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7e7dba72-29d8-4835-b290-2fbccd0d59de",
"metadata": {},
"outputs": [],
"source": [
"fig = px.imshow(df_bt_pairwise, color_continuous_scale=\"RdBu\", text_auto=\".2f\")\n",
"fig.update_layout(xaxis_title=\"Loser\", yaxis_title=\"Winner\", xaxis_side=\"top\")\n",
"fig.update_traces(hovertemplate=\"Winner: %{y}
Loser: %{x}
Fraction of Wins: %{z}\")\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2310832c-3d5b-42e7-8032-728786abcddc",
"metadata": {},
"outputs": [],
"source": [
"bootstrap_result = bootstrap(\n",
" bradley_terry,\n",
" df_food[\"left\"],\n",
" df_food[\"right\"],\n",
" df_food[\"winner\"],\n",
" n_resamples=10,\n",
" random_state=42,\n",
")\n",
"\n",
"df_melted = bootstrap_result.distribution.melt(var_name=\"Item\", value_name=\"Score\")\n",
"\n",
"df_melted.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4ed986fb-c150-4d27-9c6f-a847adab48d9",
"metadata": {},
"outputs": [],
"source": [
"fig = px.box(df_melted, x=\"Score\", y=\"Item\", color=\"Item\", title=\"Bradley–Terry Bootstrap Scores\")\n",
"fig.update_traces(hovertemplate=\"%{y}
Score: %{x:.3f}\")\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"id": "2284a8f6-06dc-4e8c-85ba-2dedd5835b9b",
"metadata": {},
"source": [
"## Inter-Rater Reliability"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "alpha-data",
"metadata": {},
"outputs": [],
"source": [
"df_codings = pd.read_csv(\n",
" \"https://raw.githubusercontent.com/dustalov/evalica/d356c3988fdf9c1db249767413a7a8a1f49d64c0/codings.csv\",\n",
" header=None,\n",
" dtype=str,\n",
")\n",
"\n",
"df_codings"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "alpha-comparison",
"metadata": {},
"outputs": [],
"source": [
"distances = [\"nominal\", \"ordinal\", \"interval\", \"ratio\"]\n",
"alpha_values = {dist: evalica.alpha(df_codings, distance=dist).alpha for dist in distances} # type: ignore[arg-type]\n",
"\n",
"pd.Series(alpha_values, name=\"alpha\").to_frame()"
]
},
{
"cell_type": "markdown",
"id": "e49c0e48-d286-40b1-b6c8-3cfaf2d8920c",
"metadata": {},
"source": [
"### Confidence Intervals\n",
"\n",
"Evalica can also compute confidence intervals for Krippendorff's alpha using bootstrapping."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7115f1ba-dd54-4d6c-8fd9-861da955f24e",
"metadata": {},
"outputs": [],
"source": [
"alpha_bootstrap_result = alpha_bootstrap(\n",
" df_codings,\n",
" distance=\"nominal\",\n",
" n_resamples=1000,\n",
" confidence_level=0.95,\n",
" random_state=42,\n",
")\n",
"\n",
"fig = px.histogram(\n",
" alpha_bootstrap_result.distribution,\n",
" nbins=50,\n",
" title=\"Krippendorff's Alpha Bootstrap Distribution\",\n",
" labels={\"value\": \"Alpha\", \"count\": \"Frequency\"},\n",
")\n",
"\n",
"fig.add_vline(x=alpha_bootstrap_result.alpha, line_dash=\"dash\", line_color=\"red\", annotation_text=\"Point Estimate\")\n",
"fig.add_vline(x=alpha_bootstrap_result.low, line_dash=\"dot\", line_color=\"blue\", annotation_text=\"Lower Bound\")\n",
"fig.add_vline(x=alpha_bootstrap_result.high, line_dash=\"dot\", line_color=\"blue\", annotation_text=\"Upper Bound\")\n",
"fig.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.14.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}