{ "cells": [ { "cell_type": "markdown", "id": "57849f4a-517b-4906-a04f-def9a0e2c6cc", "metadata": {}, "source": [ "# Tutorial" ] }, { "cell_type": "markdown", "id": "237cb1a9-eaeb-4c79-84bc-2100de1f5b4e", "metadata": {}, "source": [ "[![Open in Colab][colab_badge]][colab_link] [![Binder][binder_badge]][binder_link]\n", "\n", "[colab_badge]: https://colab.research.google.com/assets/colab-badge.svg\n", "[colab_link]: https://colab.research.google.com/github/dustalov/evalica/blob/master/docs/tutorial.ipynb\n", "[binder_badge]: https://mybinder.org/badge_logo.svg\n", "[binder_link]: https://mybinder.org/v2/gh/dustalov/evalica/HEAD?labpath=docs/tutorial.ipynb" ] }, { "cell_type": "code", "execution_count": null, "id": "b612593d-8be8-401c-b92e-f5253e50027f", "metadata": {}, "outputs": [], "source": [ "import evalica\n", "import pandas as pd\n", "import plotly.express as px\n", "from evalica import Winner, alpha_bootstrap, bootstrap, bradley_terry\n", "\n", "%config InlineBackend.figure_formats = ['svg']" ] }, { "cell_type": "code", "execution_count": null, "id": "cfed1bc3-7b95-4af6-b234-f94240706e24", "metadata": {}, "outputs": [], "source": [ "evalica.__version__" ] }, { "cell_type": "markdown", "id": "alpha-section", "metadata": {}, "source": [ "## Pairwise Comparisons" ] }, { "cell_type": "code", "execution_count": null, "id": "e647667c-adb8-4f16-9b7c-993cf2739c2b", "metadata": {}, "outputs": [], "source": [ "df_food = pd.read_csv(\n", " \"https://raw.githubusercontent.com/dustalov/evalica/0893fd0f1e8107b2d62fd6c5816b55b417c1a050/food.csv\",\n", " dtype=str,\n", ")\n", "\n", "df_food[\"winner\"] = df_food[\"winner\"].map(\n", " {\n", " \"left\": Winner.X,\n", " \"right\": Winner.Y,\n", " \"tie\": Winner.Draw,\n", " },\n", ")\n", "\n", "df_food.head(5)" ] }, { "cell_type": "code", "execution_count": null, "id": "b96c16f2-b364-4974-aa41-085564d40681", "metadata": {}, "outputs": [], "source": [ "df_food[\"left_id\"], df_food[\"right_id\"], index = evalica.indexing(df_food[\"left\"], df_food[\"right\"])" ] }, { "cell_type": "code", "execution_count": null, "id": "ec6ea649-04a5-4abb-961e-63afa4dfbc77", "metadata": {}, "outputs": [], "source": [ "matrices = evalica.matrices(df_food[\"left_id\"], df_food[\"right_id\"], df_food[\"winner\"], index)" ] }, { "cell_type": "code", "execution_count": null, "id": "42da86e0-c2c6-42c4-9c5e-84d56587fdea", "metadata": {}, "outputs": [], "source": [ "pd.DataFrame(matrices.win_matrix, index=index, columns=index) # win matrix" ] }, { "cell_type": "code", "execution_count": null, "id": "c5dc51f6-bca7-4153-8265-18c019e8a639", "metadata": {}, "outputs": [], "source": [ "pd.DataFrame(matrices.tie_matrix, index=index, columns=index) # tie matrix" ] }, { "cell_type": "code", "execution_count": null, "id": "a75c4442-934a-47f1-8e19-18b73e436fcb", "metadata": {}, "outputs": [], "source": [ "count_result = evalica.counting(df_food[\"left\"], df_food[\"right\"], df_food[\"winner\"])\n", "count_result.scores.to_frame()" ] }, { "cell_type": "code", "execution_count": null, "id": "ce104025-26a4-4a8e-b2dd-1e97ede5592f", "metadata": {}, "outputs": [], "source": [ "avr_result = evalica.average_win_rate(df_food[\"left\"], df_food[\"right\"], df_food[\"winner\"])\n", "avr_result.scores.to_frame()" ] }, { "cell_type": "code", "execution_count": null, "id": "d561c08e-4a52-435e-a847-b6adf44ab543", "metadata": {}, "outputs": [], "source": [ "bt_result = evalica.bradley_terry(df_food[\"left\"], df_food[\"right\"], df_food[\"winner\"])\n", "bt_result.scores.to_frame()" ] }, { "cell_type": "code", "execution_count": null, "id": "64bf8f2b-a08f-46b9-b4df-e5d58d8c8a50", "metadata": {}, "outputs": [], "source": [ "newman_result = evalica.newman(df_food[\"left\"], df_food[\"right\"], df_food[\"winner\"])\n", "newman_result.scores.to_frame()" ] }, { "cell_type": "code", "execution_count": null, "id": "35cdcc91-d182-4d1c-842e-251052defcd4", "metadata": {}, "outputs": [], "source": [ "eigen_result = evalica.eigen(df_food[\"left\"], df_food[\"right\"], df_food[\"winner\"])\n", "eigen_result.scores.to_frame()" ] }, { "cell_type": "code", "execution_count": null, "id": "39c5d898-944c-472d-abc2-553c54503adf", "metadata": {}, "outputs": [], "source": [ "elo_result = evalica.elo(df_food[\"left\"], df_food[\"right\"], df_food[\"winner\"])\n", "elo_result.scores.to_frame()" ] }, { "cell_type": "code", "execution_count": null, "id": "66e1a1c5-b32d-4988-92b3-7549fbad3845", "metadata": {}, "outputs": [], "source": [ "df_bt_pairwise = evalica.pairwise_frame(bt_result.scores)\n", "\n", "df_bt_pairwise" ] }, { "cell_type": "code", "execution_count": null, "id": "7e7dba72-29d8-4835-b290-2fbccd0d59de", "metadata": {}, "outputs": [], "source": [ "fig = px.imshow(df_bt_pairwise, color_continuous_scale=\"RdBu\", text_auto=\".2f\")\n", "fig.update_layout(xaxis_title=\"Loser\", yaxis_title=\"Winner\", xaxis_side=\"top\")\n", "fig.update_traces(hovertemplate=\"Winner: %{y}
Loser: %{x}
Fraction of Wins: %{z}\")\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "2310832c-3d5b-42e7-8032-728786abcddc", "metadata": {}, "outputs": [], "source": [ "bootstrap_result = bootstrap(\n", " bradley_terry,\n", " df_food[\"left\"],\n", " df_food[\"right\"],\n", " df_food[\"winner\"],\n", " n_resamples=10,\n", " random_state=42,\n", ")\n", "\n", "df_melted = bootstrap_result.distribution.melt(var_name=\"Item\", value_name=\"Score\")\n", "\n", "df_melted.head(5)" ] }, { "cell_type": "code", "execution_count": null, "id": "4ed986fb-c150-4d27-9c6f-a847adab48d9", "metadata": {}, "outputs": [], "source": [ "fig = px.box(df_melted, x=\"Score\", y=\"Item\", color=\"Item\", title=\"Bradley–Terry Bootstrap Scores\")\n", "fig.update_traces(hovertemplate=\"%{y}
Score: %{x:.3f}\")\n", "fig.show()" ] }, { "cell_type": "markdown", "id": "2284a8f6-06dc-4e8c-85ba-2dedd5835b9b", "metadata": {}, "source": [ "## Inter-Rater Reliability" ] }, { "cell_type": "code", "execution_count": null, "id": "alpha-data", "metadata": {}, "outputs": [], "source": [ "df_codings = pd.read_csv(\n", " \"https://raw.githubusercontent.com/dustalov/evalica/d356c3988fdf9c1db249767413a7a8a1f49d64c0/codings.csv\",\n", " header=None,\n", " dtype=str,\n", ")\n", "\n", "df_codings" ] }, { "cell_type": "code", "execution_count": null, "id": "alpha-comparison", "metadata": {}, "outputs": [], "source": [ "distances = [\"nominal\", \"ordinal\", \"interval\", \"ratio\"]\n", "alpha_values = {dist: evalica.alpha(df_codings, distance=dist).alpha for dist in distances} # type: ignore[arg-type]\n", "\n", "pd.Series(alpha_values, name=\"alpha\").to_frame()" ] }, { "cell_type": "markdown", "id": "e49c0e48-d286-40b1-b6c8-3cfaf2d8920c", "metadata": {}, "source": [ "### Confidence Intervals\n", "\n", "Evalica can also compute confidence intervals for Krippendorff's alpha using bootstrapping." ] }, { "cell_type": "code", "execution_count": null, "id": "7115f1ba-dd54-4d6c-8fd9-861da955f24e", "metadata": {}, "outputs": [], "source": [ "alpha_bootstrap_result = alpha_bootstrap(\n", " df_codings,\n", " distance=\"nominal\",\n", " n_resamples=1000,\n", " confidence_level=0.95,\n", " random_state=42,\n", ")\n", "\n", "fig = px.histogram(\n", " alpha_bootstrap_result.distribution,\n", " nbins=50,\n", " title=\"Krippendorff's Alpha Bootstrap Distribution\",\n", " labels={\"value\": \"Alpha\", \"count\": \"Frequency\"},\n", ")\n", "\n", "fig.add_vline(x=alpha_bootstrap_result.alpha, line_dash=\"dash\", line_color=\"red\", annotation_text=\"Point Estimate\")\n", "fig.add_vline(x=alpha_bootstrap_result.low, line_dash=\"dot\", line_color=\"blue\", annotation_text=\"Lower Bound\")\n", "fig.add_vline(x=alpha_bootstrap_result.high, line_dash=\"dot\", line_color=\"blue\", annotation_text=\"Upper Bound\")\n", "fig.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.14.3" } }, "nbformat": 4, "nbformat_minor": 5 }