15:["$","section",null,{"className":"mt-20 pb-10","children":[["$","div",null,{"className":"container-custom-screen","id":"about","children":[["$","div",null,{"className":"flex items-center justify-between","children":[["$","div",null,{"className":"flex-none undefined","children":["$","img",null,{"src":"https://mars-images.imgix.net/1770455531958-1770455528541icon.png?auto=compress&fit=max&w=128","alt":"Statistical Testing for Non-Deterministic AI Agents","className":"rounded-full w-16 h-16 object-cover undefined","loading":"lazy"}]}],["$","$L17",null,{"weekRank":3,"isLaunchEnd":false}]]}],["$","h1",null,{"className":"mt-3 text-slate-100 font-medium","children":"agentrial"}],["$","p",null,{"className":"text-slate-400 text-sm sm:text-base mt-1","children":"Statistical Testing for Non-Deterministic AI Agents"}],["$","div",null,{"className":"text-sm mt-3 flex items-center gap-x-3","children":[["$","$L18",null,{"target":"_blank","href":"https://github.com/alepot55/agentrial?ref=devhunt","className":"py-2.5 px-3 font-medium text-center text-white active:shadow-none rounded-lg shadow bg-slate-700 md:bg-[linear-gradient(179.23deg,_#1E293B_0.66%,_rgba(30,_41,_59,_0)_255.99%)] hover:bg-slate-800 duration-150 flex items-center gap-x-2","children":["Live preview",["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","viewBox":"0 0 20 20","fill":"currentColor","className":"w-5 h-5","children":[["$","path",null,{"fillRule":"evenodd","d":"M4.25 5.5a.75.75 0 00-.75.75v8.5c0 .414.336.75.75.75h8.5a.75.75 0 00.75-.75v-4a.75.75 0 011.5 0v4A2.25 2.25 0 0112.75 17h-8.5A2.25 2.25 0 012 14.75v-8.5A2.25 2.25 0 014.25 4h5a.75.75 0 010 1.5h-5z","clipRule":"evenodd"}],["$","path",null,{"fillRule":"evenodd","d":"M6.194 12.753a.75.75 0 001.06.053L16.5 4.44v2.81a.75.75 0 001.5 0v-4.5a.75.75 0 00-.75-.75h-4.5a.75.75 0 000 1.5h2.553l-9.056 8.194a.75.75 0 00-.053 1.06z","clipRule":"evenodd"}]]}]]}],["$","$L19",null,{"productId":7867,"count":0,"launchDate":"2027-06-29","launchEnd":"2027-07-05T23:59:59+00:00"}]]}],["$","div",null,{"className":"mt-10","children":["$","$L1a",null,{"productId":7867,"owner":{"id":"efa1dd37-15d7-4733-b36f-32a64c15d550","updated_at":"2026-02-06T13:22:43.479+00:00","username":"AlessandroPotenza_b45","full_name":"Alessandro Potenza","avatar_url":"https://avatars.githubusercontent.com/u/61759069?v=4","website_url":"","headline":"","about":"","twitter":null,"social_url":"www.linkedin.com/in/alepot55"}}]}]]}],["$","div",null,{"className":"border-b border-slate-800 text-sm text-slate-400 font-medium mt-20 sticky pt-2 top-[3.75rem] z-10 bg-slate-900","children":["$","ul",null,{"className":"flex items-center gap-x-4 overflow-auto w-full container-custom-screen","children":[["$","$a",null,{"fallback":null,"children":["$","$Lb",null,{"children":["$","$L1b",null,{"hash":"#","children":"About product"}]}]}],["$","$a",null,{"fallback":null,"children":["$","$Lb",null,{"children":["$","$L1b",null,{"hash":"#comments","children":"Comments"}]}]}],["$","$a",null,{"fallback":null,"children":["$","$Lb",null,{"children":["$","$L1b",null,{"hash":"#details","children":"Launch details"}]}]}],["$","$a",null,{"fallback":null,"children":["$","$Lb",null,{"children":["$","$L1b",null,{"hash":"#launches","children":"Related launches"}]}]}]]}]}],["$","div",null,{"className":"space-y-20","children":[["$","div",null,{"children":["$","div",null,{"className":"relative overflow-hidden pb-12","children":[["$","div",null,{"className":"absolute top-0 w-full h-[100px] opacity-40 bg-[linear-gradient(180deg,_rgba(124,_58,_237,_0.06)_0%,_rgba(72,_58,_237,_0)_100%)]"}],["$","div",null,{"className":"relative container-custom-screen mt-12","children":[["$","div",null,{"className":"prose text-slate-100 whitespace-pre-wrap","dangerouslySetInnerHTML":{"__html":"

agentrial is an open-source Python framework that runs your AI agent N times on each test case and gives you confidence intervals instead of pass/fail.

Your agent passed 10/10 runs? Wilson CI says the true reliability could be as low as 72%. agentrial catches that.

Multi-trial evaluation — Wilson confidence intervals on pass rates, bootstrap resampling on cost/latency
Failure attribution — Fisher exact test pinpoints which step in your pipeline breaks
Regression detection — compare versions in CI/CD, exit code 1 blocks the PR on significant drops
Framework-agnostic — adapters for LangGraph, CrewAI, AutoGen, Pydantic AI, OpenAI Agents SDK, smolagents, or any Python callable
Local-first — no accounts, no telemetry, no cloud. MIT license.

pip install agentrial

\n"}}],["$","div",null,{"className":"mt-6 flex flex-wrap gap-3 items-center","children":[["$","h3",null,{"className":"text-sm text-slate-400 font-medium","children":"Classified in"}],["$","ul",null,{"className":"flex flex-wrap items-center gap-3","children":[["$","li",null,{"className":"","children":["$","$L18",null,{"href":"/tools/cli","className":"flex-none text-sm text-slate-400 font-medium border border-slate-700 bg-slate-800/50 rounded-full px-3 py-1","children":"CLI"}]}],["$","li",null,{"className":"","children":["$","$L18",null,{"href":"/tools/open-source","className":"flex-none text-sm text-slate-400 font-medium border border-slate-700 bg-slate-800/50 rounded-full px-3 py-1","children":"Open Source"}]}],["$","li",null,{"className":"","children":["$","$L18",null,{"href":"/tools/ai","className":"flex-none text-sm text-slate-400 font-medium border border-slate-700 bg-slate-800/50 rounded-full px-3 py-1","children":"AI"}]}],["$","li",null,{"className":"","children":["$","$L18",null,{"href":"/tools/monitoring","className":"flex-none text-sm text-slate-400 font-medium border border-slate-700 bg-slate-800/50 rounded-full px-3 py-1","children":"Monitoring"}]}]]}]]}]]}],["$","div",null,{"className":"max-w-screen-2xl mt-10 mx-auto sm:px-8","children":["$","$L1c",null,{"assets":["https://mars-images.imgix.net/1770455938781-1770455938559Screenshot-from-2026-02-07-10-18-05.png?auto=compress&fit=max&w=750","https://mars-images.imgix.net/1770455942009-1770455941932Screenshot-from-2026-02-07-10-18-25.png?auto=compress&fit=max&w=750","https://mars-images.imgix.net/1770455944142-1770455944072Screenshot-from-2026-02-07-10-18-43.png?auto=compress&fit=max&w=750"],"alt":"agentrial","src":"https://app.paracast.io/api/getPromoVideoFromSiteUrl/?project_url=https://github.com/alepot55/agentrial","children":[["$","li",null,{"className":"flex-none snap-normal snap-start py-3 pointer-events-none","children":["$","img",null,{"src":"https://mars-images.imgix.net/1770455938781-1770455938559Screenshot-from-2026-02-07-10-18-05.png?auto=compress&w=750","alt":"agentrial","className":"w-[459px] h-auto rounded-lg object-contain "}]}],["$","li",null,{"className":"flex-none snap-normal snap-start py-3 pointer-events-none","children":["$","img",null,{"src":"https://mars-images.imgix.net/1770455942009-1770455941932Screenshot-from-2026-02-07-10-18-25.png?auto=compress&w=750","alt":"agentrial","className":"w-[459px] h-auto rounded-lg object-contain "}]}],["$","li",null,{"className":"flex-none snap-normal snap-start py-3 pointer-events-none","children":["$","img",null,{"src":"https://mars-images.imgix.net/1770455944142-1770455944072Screenshot-from-2026-02-07-10-18-43.png?auto=compress&w=750","alt":"agentrial","className":"w-[459px] h-auto rounded-lg object-contain "}]}]]}]}]]}]}],["$","$L1d",null,{"productId":"efa1dd37-15d7-4733-b36f-32a64c15d550","comments":[],"slug":"agentrial"}],["$","div",null,{"className":"container-custom-screen","id":"details","children":[["$","h3",null,{"className":"text-slate-50 font-medium","children":"About this launch"}],["$","p",null,{"className":"text-slate-300 mt-6","children":["agentrial"," ","by"," ",["$","$L18",null,{"href":"/@AlessandroPotenza_b45","className":"text-orange-500 hover:text-orange-400 duration-150","children":"Alessandro Potenza"}]," ","Will be launched ","June 29th 2027","."]}],null]}],["$","div",null,{"className":"container-custom-screen","id":"launches","children":["$","$L1e",null,{}]}],["$","div",null,{"className":"container-custom-screen","id":"launches","children":[["$","h3",null,{"className":"text-slate-50 font-medium","children":"Trending launches"}],["$","$a",null,{"fallback":null,"children":["$","$Lb",null,{"children":["$","$L1f",null,{}]}]}]]}]]}]]}]