{ "cells": [ { "cell_type": "markdown", "id": "59428441", "metadata": {}, "source": [ "## Tasks" ] }, { "cell_type": "code", "execution_count": 1, "id": "8c84b475", "metadata": {}, "outputs": [], "source": [ "from dataclasses import dataclass, fields\n", "\n", "@dataclass\n", "class Task:\n", " is_project_private: int\n", " is_user_approved: int\n", " load_packit_yaml: int\n", " set_status: int\n", " \n", " @property\n", " def total(self):\n", " return sum(getattr(self, field.name) for field in fields(self.__class__))\n", " \n", " def __mul__(self, num):\n", " if not isinstance(num, int):\n", " raise TypeError(f\"unsupported operand type(s) for *: {type(self)!r} and {type(num)!r}\")\n", " return Task(*[num * getattr(self, field.name) for field in fields(self.__class__)])\n", " \n", " __rmul__ = __mul__\n", " \n", " def __add__(self, obj):\n", " if not isinstance(obj, Task) and not isinstance(obj, int):\n", " raise TypeError(f\"unsupported operand type(s) for +: {type(self)!r} and {type(obj)!r}\")\n", " if isinstance(obj, int) and obj != 0:\n", " raise TypeError(f\"adding {obj!r} to an {type(self)!r} is not supported\")\n", " if isinstance(obj, int) and obj == 0:\n", " obj = Task(*[0] * len(fields(self.__class__)))\n", " return Task(*[(getattr(self, field.name) + getattr(obj, field.name)) for field in fields(self.__class__)])\n", " \n", " __radd__ = __add__" ] }, { "cell_type": "markdown", "id": "464ee1d0", "metadata": {}, "source": [ "## Estimating the number of API calls" ] }, { "cell_type": "code", "execution_count": 2, "id": "bd8ae47b", "metadata": {}, "outputs": [], "source": [ "from collections import namedtuple\n", "\n", "metrics = [\n", " \"prs_per_hour_per_org\", \n", " \"total_is_project_private\", \n", " \"total_is_user_approved\", \n", " \"total_load_packit_yaml\", \n", " \"total_set_status\",\n", " \"grand_total\",\n", "]\n", "\n", "class APICallMetrics(namedtuple(\"APICallMetrics\", metrics)):\n", " def __repr__(self):\n", " return \"\\n\".join(f\"{field} = {getattr(self, field)}\" for field in self._fields)\n", " \n", "def github_api_calls(\n", " # The number of chroots configured to be built in Copr\n", " chroots=3, \n", " # GitHub server-to-server rate limit per hour\n", " rate_limit=5000,\n", " # Calls to load .packit.yaml.\n", " # The current implementation cycles through a list of 7 files.\n", " # If the config file is .packit.yaml, this is loaded from 2 calls.\n", " # If the config file is packit.yaml, it's loaded from 5 calls.\n", " # The +1 call is to try to find the specfile path.\n", " load_packit_yaml=2+1,\n", " # Whether builds are enabled\n", " builds_enabled=True, \n", " # Whether tests are enabled\n", " tests_enabled=True, \n", " # Load packit.yaml once during the pipeline run\n", " load_packit_yaml_once=False, \n", " # Set all flags (including test ones) when the build status changes\n", " all_flags=True,\n", " # Check if the repo is private only in the beginning\n", " is_private_once=False,\n", "):\n", " build_status_flag = int(builds_enabled)\n", " test_status_flag = int(tests_enabled)\n", " tasks = {\n", " \"PR is updated\": Task(\n", " # https://github.com/packit/packit-service/blob/bc1f66bb82aa7c013bde8b8e670ce7e02f55de41/packit_service/worker/jobs.py#L156\n", " is_project_private=1,\n", " # https://github.com/packit/packit-service/blob/bc1f66bb82aa7c013bde8b8e670ce7e02f55de41/packit_service/worker/allowlist.py#L275\n", " is_user_approved=2,\n", " # https://github.com/packit/packit-service/blob/bc1f66bb82aa7c013bde8b8e670ce7e02f55de41/packit_service/worker/events/event.py#L415\n", " load_packit_yaml=load_packit_yaml,\n", " set_status=chroots * (build_status_flag + test_status_flag)\n", " ),\n", " \"Submit Copr build\": Task(\n", " is_project_private=0,\n", " is_user_approved=0,\n", " load_packit_yaml=0,\n", " set_status=chroots * (build_status_flag + test_status_flag),\n", " ),\n", " }\n", " load_packit_yaml = load_packit_yaml if not load_packit_yaml_once else 0\n", " flags_to_set = (build_status_flag + test_status_flag) if all_flags else build_status_flag\n", " is_private = 0 if is_private_once else 1\n", " task = Task(is_private, 0, load_packit_yaml, flags_to_set)\n", " tasks.update({\n", " \"SRPM build started\": chroots * task,\n", " \"SRPM build finished\": chroots * task,\n", " \"RPM build started\": chroots * task,\n", " \"RPM build finished\": chroots * task,\n", " })\n", " if tests_enabled:\n", " task = Task(is_private, 0, load_packit_yaml, test_status_flag)\n", " tasks.update({\n", " \"Tests started\": chroots * task,\n", " \"Tests finished\": chroots * task,\n", " })\n", " total_is_project_private = sum(task.is_project_private for task in tasks.values())\n", " total_is_user_approved = sum(task.is_user_approved for task in tasks.values())\n", " total_load_packit_yaml = sum(task.load_packit_yaml for task in tasks.values())\n", " total_set_status = sum(task.set_status for task in tasks.values())\n", " grand_total = sum(tasks.values()).total\n", " prs_per_hour_per_org = int(rate_limit / grand_total)\n", " \n", " return APICallMetrics(\n", " prs_per_hour_per_org, \n", " total_is_project_private, \n", " total_is_user_approved, \n", " total_load_packit_yaml, \n", " total_set_status,\n", " grand_total,\n", " )" ] }, { "cell_type": "code", "execution_count": 3, "id": "cc02ce48", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "prs_per_hour_per_org = 41\n", "total_is_project_private = 19\n", "total_is_user_approved = 2\n", "total_load_packit_yaml = 57\n", "total_set_status = 42\n", "grand_total = 120" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "github_api_calls(\n", " chroots=3, \n", " rate_limit=5000, \n", " load_packit_yaml=2+1, \n", " builds_enabled=True, \n", " tests_enabled=True\n", ")" ] }, { "cell_type": "code", "execution_count": 4, "id": "453ac2c3", "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as pp\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 5, "id": "dd6e3f87", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "labels = [\"Project is private\", \"User is approved\", \"load packit.yaml\", \"set status\"]\n", "sizes = github_api_calls(\n", " chroots=3, \n", " rate_limit=5000, \n", " load_packit_yaml=2+1, \n", " builds_enabled=True, \n", " tests_enabled=True\n", ")[1:-1]\n", "fig1, ax1 = pp.subplots()\n", "ax1.pie(sizes, labels=labels, autopct='%1.1f%%')\n", "ax1.axis('equal')\n", "\n", "pp.show()" ] }, { "cell_type": "markdown", "id": "82c80b3e", "metadata": {}, "source": [ "## A few common scenarios" ] }, { "cell_type": "code", "execution_count": 6, "id": "a0fa3ac4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3 chroots, config is called '.packit.yaml'\n" ] }, { "data": { "text/plain": [ "prs_per_hour_per_org = 41\n", "total_is_project_private = 19\n", "total_is_user_approved = 2\n", "total_load_packit_yaml = 57\n", "total_set_status = 42\n", "grand_total = 120" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"3 chroots, config is called '.packit.yaml'\")\n", "github_api_calls(\n", " chroots=3, \n", " rate_limit=5000, \n", " load_packit_yaml=2+1, \n", " builds_enabled=True, \n", " tests_enabled=True\n", ")" ] }, { "cell_type": "code", "execution_count": 7, "id": "8035045b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "6 chroots, config is called '.packit.yaml'\n" ] }, { "data": { "text/plain": [ "prs_per_hour_per_org = 21\n", "total_is_project_private = 37\n", "total_is_user_approved = 2\n", "total_load_packit_yaml = 111\n", "total_set_status = 84\n", "grand_total = 234" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"6 chroots, config is called '.packit.yaml'\")\n", "github_api_calls(\n", " chroots=6, \n", " rate_limit=5000, \n", " load_packit_yaml=2+1, \n", " builds_enabled=True, \n", " tests_enabled=True\n", ")" ] }, { "cell_type": "code", "execution_count": 8, "id": "a8e19023", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3 chroots, config is called 'packit.yaml'\n" ] }, { "data": { "text/plain": [ "prs_per_hour_per_org = 28\n", "total_is_project_private = 19\n", "total_is_user_approved = 2\n", "total_load_packit_yaml = 114\n", "total_set_status = 42\n", "grand_total = 177" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"3 chroots, config is called 'packit.yaml'\")\n", "github_api_calls(\n", " chroots=3, \n", " rate_limit=5000, \n", " load_packit_yaml=5+1, \n", " builds_enabled=True, \n", " tests_enabled=True\n", ")" ] }, { "cell_type": "code", "execution_count": 9, "id": "a7159baf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "6 chroots, config is called 'packit.yaml'\n" ] }, { "data": { "text/plain": [ "prs_per_hour_per_org = 14\n", "total_is_project_private = 37\n", "total_is_user_approved = 2\n", "total_load_packit_yaml = 222\n", "total_set_status = 84\n", "grand_total = 345" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"6 chroots, config is called 'packit.yaml'\")\n", "github_api_calls(\n", " chroots=6, \n", " rate_limit=5000, \n", " load_packit_yaml=5+1, \n", " builds_enabled=True, \n", " tests_enabled=True\n", ")" ] }, { "cell_type": "code", "execution_count": 10, "id": "a748a141", "metadata": { "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "6 chroots, config is called '.packit.yaml', no tests\n" ] }, { "data": { "text/plain": [ "prs_per_hour_per_org = 36\n", "total_is_project_private = 25\n", "total_is_user_approved = 2\n", "total_load_packit_yaml = 75\n", "total_set_status = 36\n", "grand_total = 138" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(\"6 chroots, config is called '.packit.yaml', no tests\")\n", "github_api_calls(\n", " chroots=6, \n", " rate_limit=5000, \n", " load_packit_yaml=2+1, \n", " builds_enabled=True, \n", " tests_enabled=False\n", ")" ] }, { "cell_type": "markdown", "id": "42af2a1a", "metadata": {}, "source": [ "## Ways to improve things" ] }, { "cell_type": "markdown", "id": "0c05d101", "metadata": {}, "source": [ "The main issue seems to be that `task.steve_jobs.process_message` re-creates the context of the pipeline by getting information from GitHub: checks if the project is private, if the user is approved to run the pipeline, and load the package config (`.packit.yaml`).\n", "\n", "All this could be avoided by looking up the pipeline when an event arrives (Copr events, testing farm events - when these are received there already needs to be a pipeline created, otherwise they are invalid), and getting this information from the pipeline.\n", "\n", "The only time it makes sense to load this from GitHub is when the pipeline is started, for example when the PR event is received." ] }, { "cell_type": "markdown", "id": "616bb63c", "metadata": {}, "source": [ "Let's pick the worst case from the list above (*6 chroots, config is called 'packit.yaml'*) as a reference, and check how the number of API calls change if we apply the above.\n", "\n", "**Load the package config only once per pipeline run** to increase the number of possible runs from 14 to 38 (~171% increase). This is achived by reducing the number of API calls to get the package config from 222 to 6." ] }, { "cell_type": "code", "execution_count": 11, "id": "c1b001e5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "prs_per_hour_per_org = 38\n", "total_is_project_private = 37\n", "total_is_user_approved = 2\n", "total_load_packit_yaml = 6\n", "total_set_status = 84\n", "grand_total = 129" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "github_api_calls(\n", " chroots=6, \n", " rate_limit=5000,\n", " load_packit_yaml=5+1,\n", " builds_enabled=True,\n", " tests_enabled=True,\n", " load_packit_yaml_once=True)" ] }, { "cell_type": "markdown", "id": "2284d964", "metadata": {}, "source": [ "**Don't update the test flags while the build is in progress**, to get to 47 PRs/hour/org (a 235% increase in total), thanks to shaving of 24 API calls to update the status flags." ] }, { "cell_type": "code", "execution_count": 12, "id": "1cc97054", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "prs_per_hour_per_org = 47\n", "total_is_project_private = 37\n", "total_is_user_approved = 2\n", "total_load_packit_yaml = 6\n", "total_set_status = 60\n", "grand_total = 105" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "github_api_calls(\n", " chroots=6, \n", " rate_limit=5000, \n", " load_packit_yaml=5+1, \n", " builds_enabled=True, \n", " tests_enabled=True, \n", " load_packit_yaml_once=True,\n", " all_flags=False)" ] }, { "cell_type": "markdown", "id": "21c94f9b", "metadata": {}, "source": [ "**Reduce the number of calls to get the package config**, and don't search for the specfile path ([packit/packit#1799](https://github.com/packit/packit/issues/1799)). This is an increase in the possible PRs handled per hour per org of ~250%.\n", "\n", "Reducing the number of calls to get the package config has probably a greater impact when Packit is enabled for all repositories in the org, but only a few of those repositories have a package config present. In this case events from repositories which don't have a package config will consume only 2 API calls (1 to check if the repo is private, and 1 to find out that there is no package config), instead of 9 API calls." ] }, { "cell_type": "code", "execution_count": 13, "id": "5eab470d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "prs_per_hour_per_org = 49\n", "total_is_project_private = 37\n", "total_is_user_approved = 2\n", "total_load_packit_yaml = 2\n", "total_set_status = 60\n", "grand_total = 101" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "github_api_calls(\n", " chroots=6, \n", " rate_limit=5000, \n", " load_packit_yaml=2, \n", " builds_enabled=True, \n", " tests_enabled=True, \n", " load_packit_yaml_once=True, \n", " all_flags=False)" ] }, { "cell_type": "markdown", "id": "be6760ff", "metadata": {}, "source": [ "By **checking if the repo is private only once**, when the event that the PR was updated is received and the pipeline created, a further 55% increase can be achieved, which brings us to a **442% increase compared to the current state**.\n", "\n", "The number of API calls drops from 345 to 65." ] }, { "cell_type": "code", "execution_count": 14, "id": "eba2722a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "prs_per_hour_per_org = 76\n", "total_is_project_private = 1\n", "total_is_user_approved = 2\n", "total_load_packit_yaml = 2\n", "total_set_status = 60\n", "grand_total = 65" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "github_api_calls(\n", " chroots=6, \n", " rate_limit=5000, \n", " load_packit_yaml=2, \n", " builds_enabled=True, \n", " tests_enabled=True, \n", " load_packit_yaml_once=True, \n", " all_flags=False, \n", " is_private_once=True)" ] }, { "cell_type": "markdown", "id": "cea66f97", "metadata": {}, "source": [ "The above is a 246% improvement even for the best case scenario, by dropping the number of API calls per pipeline from 120 to 35." ] }, { "cell_type": "code", "execution_count": 15, "id": "d4094d47", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "prs_per_hour_per_org = 142\n", "total_is_project_private = 1\n", "total_is_user_approved = 2\n", "total_load_packit_yaml = 2\n", "total_set_status = 30\n", "grand_total = 35" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "github_api_calls(\n", " chroots=3,\n", " rate_limit=5000,\n", " load_packit_yaml=2,\n", " builds_enabled=True,\n", " tests_enabled=True,\n", " load_packit_yaml_once=True,\n", " all_flags=False,\n", " is_private_once=True)" ] }, { "cell_type": "markdown", "id": "7fed3b7e", "metadata": {}, "source": [ "From this point on, the number of API calls could be further reduced only by having fewer status flags (merging flags for different chroots) or making fewer updates to these flags. Both of these could make the user experiance worse." ] }, { "cell_type": "markdown", "id": "8745cba8", "metadata": {}, "source": [ "Note, that the above assumes that pipelines are never running in parallel, which is probably not the case, for example, when PRs are updated while a previous pipeline is still in progress. In these situations old pipeline runs could still consume from the GitHub API rate limit (and maybe override statuses?). This could be solved by canceling Copr builds and test runs belonging to pipelines which become irrelevant ([packit/packit-service#5](https://github.com/packit/packit-service/issues/5))." ] }, { "cell_type": "code", "execution_count": null, "id": "42a23da2", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.1" } }, "nbformat": 4, "nbformat_minor": 5 }