diff --git a/agents/github_specialist/_context.md b/agents/github_specialist/_context.md
new file mode 100644
index 0000000000..8a4ebb3d0a
--- /dev/null
+++ b/agents/github_specialist/_context.md
@@ -0,0 +1 @@
+§§include(/a0/agents/github_specialist/_context.md)
\ No newline at end of file
diff --git a/agents/github_specialist/prompts/agent.system.main.role.md b/agents/github_specialist/prompts/agent.system.main.role.md
new file mode 100644
index 0000000000..6d01d07fbc
--- /dev/null
+++ b/agents/github_specialist/prompts/agent.system.main.role.md
@@ -0,0 +1 @@
+§§include(/a0/agents/github_specialist/prompts/agent.system.main.role.md)
\ No newline at end of file
diff --git a/agents/tdd_test/_context.md b/agents/tdd_test/_context.md
new file mode 100644
index 0000000000..c2563905aa
--- /dev/null
+++ b/agents/tdd_test/_context.md
@@ -0,0 +1 @@
+§§include(/a0/agents/tdd_test/_context.md)
\ No newline at end of file
diff --git a/agents/tdd_test/prompts/agent.system.main.role.md b/agents/tdd_test/prompts/agent.system.main.role.md
new file mode 100644
index 0000000000..7619d42682
--- /dev/null
+++ b/agents/tdd_test/prompts/agent.system.main.role.md
@@ -0,0 +1 @@
+§§include(/a0/agents/tdd_test/prompts/agent.system.main.role.md)
\ No newline at end of file
diff --git a/docs/res/a0-vector-graphics/banner.svg b/docs/res/a0-vector-graphics/banner.svg
index 5a98670be1..2d2e751c39 100644
--- a/docs/res/a0-vector-graphics/banner.svg
+++ b/docs/res/a0-vector-graphics/banner.svg
@@ -1,51 +1 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
-<svg width="100%" height="100%" viewBox="0 0 1500 500" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;">
-    <g transform="matrix(0.984135,0,0,0.982522,6.50547,-0.188557)">
-        <rect x="-12.422" y="-10.789" width="1535.8" height="528.428" style="fill:url(#_Radial1);"/>
-    </g>
-    <g transform="matrix(1,0,0,1,-12.7156,-0.879672)">
-        <g transform="matrix(0.419509,0,0,0.419509,460.669,-51.1647)">
-            <path d="M922.78,720.83C854.26,603.08 785.44,484.81 715.23,364.16C645.76,484.51 577.31,603.12 508.95,721.56L402.84,721.56C507.14,541.3 715.52,183.82 715.52,183.82C715.52,183.82 924.82,540.47 1029.09,720.82L922.77,720.82L922.78,720.83Z" style="fill:white;fill-rule:nonzero;"/>
-        </g>
-        <g transform="matrix(0.419509,0,0,0.419509,460.669,-51.1647)">
-            <path d="M849.11,721.33L579.87,721.33C597.8,690.16 615.14,659.99 632.35,630.07L797.8,630.07C814.57,659.9 831.26,689.59 849.1,721.33L849.11,721.33Z" style="fill:white;fill-rule:nonzero;"/>
-        </g>
-        <g transform="matrix(0.419509,0,0,0.419509,460.669,-51.1647)">
-            <path d="M855.05,1206.57C865.74,1222.92 875.81,1238.31 887.07,1255.53L852.23,1255.53C842.21,1241.9 831.33,1227.1 820.62,1212.54L781.05,1212.54L781.05,1255.36L753.06,1255.36L753.06,1108.1C769.51,1108.1 785.22,1107.98 800.92,1108.14C813.78,1108.27 826.84,1107.45 839.46,1109.35C860.09,1112.45 876.13,1123.43 880.48,1144.9C884.99,1167.11 881.36,1187.82 861.03,1202.2C859.31,1203.42 857.63,1204.69 855.05,1206.58L855.05,1206.57ZM781.1,1184.46C799.72,1184.46 817.64,1185.48 835.37,1184.1C847.13,1183.18 852.33,1174.28 852.66,1162.31C853,1149.78 847.72,1140.09 835.69,1138.91C817.73,1137.15 799.46,1138.45 781.11,1138.45L781.11,1184.46L781.1,1184.46Z" style="fill:white;fill-rule:nonzero;"/>
-        </g>
-        <g transform="matrix(0.419509,0,0,0.419509,460.669,-51.1647)">
-            <path d="M403.72,1108.89L403.72,1129.91C375.04,1162.13 346.62,1194.07 316,1228.46L403.05,1228.46L403.05,1255.56L278.37,1255.56L278.37,1227.14C304.1,1198.59 330.42,1169.4 359.39,1137.26L278.72,1137.26L278.72,1108.9L403.7,1108.9L403.72,1108.89Z" style="fill:white;fill-rule:nonzero;"/>
-        </g>
-        <g transform="matrix(0.419509,0,0,0.419509,460.669,-51.1647)">
-            <path d="M900.13,840.53C935.45,875.53 968.66,908.44 1003.84,943.31L1003.84,859L1029.8,859L1029.8,1007.54C994.64,973.07 961.31,940.39 925.71,905.5L925.71,994.47L900.12,994.47L900.12,840.52L900.13,840.53Z" style="fill:white;fill-rule:nonzero;"/>
-        </g>
-        <g transform="matrix(0.419509,0,0,0.419509,460.669,-51.1647)">
-            <path d="M1006.95,1219.73C1014.51,1215.98 1025.31,1211.1 1031.82,1207.87C1054.52,1238.14 1089.37,1231.17 1104.3,1217.73C1119.23,1204.29 1123.71,1184.88 1118.11,1165.44C1110.79,1146.63 1094.57,1131.95 1070.81,1132.78C1047.05,1133.61 1026.14,1150.53 1024.15,1178.98L996.59,1178.98C996.27,1151.53 1016.46,1119.08 1048.15,1109.55C1088.01,1097.56 1127.52,1116.8 1142.4,1155.43C1156.19,1191.23 1139.26,1231.34 1102.85,1249.09C1069.61,1265.3 1026.07,1252.91 1006.95,1219.73Z" style="fill:white;fill-rule:nonzero;"/>
-        </g>
-        <g transform="matrix(0.419509,0,0,0.419509,460.669,-51.1647)">
-            <path d="M562.28,879C556.22,883.6 548.83,888.69 543.59,892.67C512.92,867.29 478.07,875.76 465.63,907.61C458.16,928.52 469.07,955.92 492.03,964.37C512,971.71 534.11,964.03 539.86,947.13C535.12,945.7 530.39,944.27 524.39,942.45C523.88,935.42 523.37,928.39 522.77,920.05L575.8,920.05C579.77,950.73 555.32,984.75 524.08,992.45C489.54,1000.96 453.23,981.93 440.65,948.73C428.25,915.99 443.29,876.83 474.54,860.46C505.17,844.41 543.97,852.04 562.27,879.02L562.28,879Z" style="fill:white;fill-rule:nonzero;"/>
-        </g>
-        <g transform="matrix(0.419509,0,0,0.419509,460.669,-51.1647)">
-            <path d="M270.94,848.74C297.34,899.29 321.9,946.31 347.56,995.45L318.28,995.45C303,967.76 287.35,939.4 270.78,909.38C254.93,939.12 240.05,967.05 224.87,995.54L195,995.54C220.13,946.97 244.73,899.41 270.94,848.74Z" style="fill:white;fill-rule:nonzero;"/>
-        </g>
-        <g transform="matrix(0.419509,0,0,0.419509,460.669,-51.1647)">
-            <path d="M1170.37,881.96L1121.09,881.96L1121.09,856.64L1245,856.64L1245,881.97L1197.66,881.97L1197.66,994.75L1170.37,994.75L1170.37,881.95L1170.37,881.96Z" style="fill:white;fill-rule:nonzero;"/>
-        </g>
-        <g transform="matrix(0.419509,0,0,0.419509,460.669,-51.1647)">
-            <path d="M553.53,1135.42L553.53,1171.44L588.09,1171.94C588.09,1171.94 588.08,1188.39 588.08,1199.51C574.01,1200 559,1199.75 545.16,1196.48C533.45,1193.71 525.04,1184.09 524.52,1171.26C523.7,1151.01 524.3,1129.7 524.3,1108.14L642.89,1108.14L642.89,1135.42L553.53,1135.42Z" style="fill:white;fill-rule:nonzero;"/>
-        </g>
-        <g transform="matrix(0.419509,0,0,0.419509,460.669,-51.1647)">
-            <path d="M524.56,1255.14L524.56,1217.55C532.29,1217.2 539.47,1216.87 547,1216.52C548.9,1220.35 548.68,1227.37 552.45,1227.53C556.22,1227.69 611.42,1227.53 642.31,1227.53L642.31,1255.14L524.56,1255.14Z" style="fill:white;fill-rule:nonzero;"/>
-        </g>
-        <g transform="matrix(0.419509,0,0,0.419509,460.669,-51.1647)">
-            <path d="M712.11,883.49L712.11,916.81L744.08,917.27C744.08,917.27 744.07,932.49 744.07,942.78C731,943 717.17,943 704.37,939.98C693.54,937.42 685.76,928.52 685.28,916.65C684.52,897.91 685.08,878.2 685.08,858.26L794.79,858.26L794.79,883.49L712.11,883.49Z" style="fill:white;fill-rule:nonzero;"/>
-        </g>
-        <g transform="matrix(0.419509,0,0,0.419509,460.669,-51.1647)">
-            <path d="M685.31,994.24L685.31,959.47C692.46,959.14 699.1,958.84 706.07,958.52C707.82,962.06 707.62,968.56 711.11,968.7C714.6,968.85 765.66,968.7 794.23,968.7L794.23,994.24L685.3,994.24L685.31,994.24Z" style="fill:white;fill-rule:nonzero;"/>
-        </g>
-    </g>
-    <defs>
-        <radialGradient id="_Radial1" cx="0" cy="0" r="1" gradientUnits="userSpaceOnUse" gradientTransform="matrix(336.036,-3.00149,2.99293,336.036,756.875,288.649)"><stop offset="0" style="stop-color:rgb(44,49,94);stop-opacity:1"/><stop offset="1" style="stop-color:rgb(30,31,55);stop-opacity:1"/></radialGradient>
-    </defs>
-</svg>
+§§include(/a0/docs/res/a0-vector-graphics/banner.svg)
\ No newline at end of file
diff --git a/docs/res/a0-vector-graphics/dark.svg b/docs/res/a0-vector-graphics/dark.svg
old mode 100755
new mode 100644
index 140246cd02..85ada8c5dd
--- a/docs/res/a0-vector-graphics/dark.svg
+++ b/docs/res/a0-vector-graphics/dark.svg
@@ -1,20 +1 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1440 1440">
-  <path d="m922.78,720.83c-68.52-117.75-137.34-236.02-207.55-356.67-69.47,120.35-137.92,238.96-206.28,357.4h-106.11c104.3-180.26,312.68-537.74,312.68-537.74h0s209.3,356.65,313.57,537h-106.32Z" fill="#7a7a7a" stroke-width="0"/>
-  <path d="m849.11,721.33h-269.24c17.93-31.17,35.27-61.34,52.48-91.26h165.45c16.77,29.83,33.46,59.52,51.3,91.26Z" fill="#7a7a7a" stroke-width="0"/>
-  <path d="m855.05,1206.57c10.69,16.35,20.76,31.74,32.02,48.96h-34.84c-10.02-13.63-20.9-28.43-31.61-42.99h-39.57v42.82h-27.99v-147.26c16.45,0,32.16-.12,47.86.04,12.86.13,25.92-.69,38.54,1.21,20.63,3.1,36.67,14.08,41.02,35.55,4.51,22.21.88,42.92-19.45,57.3-1.72,1.22-3.4,2.49-5.98,4.38Zm-73.95-22.11c18.62,0,36.54,1.02,54.27-.36,11.76-.92,16.96-9.82,17.29-21.79.34-12.53-4.94-22.22-16.97-23.4-17.96-1.76-36.23-.46-54.58-.46v46.01Z" fill="#7a7a7a" stroke-width="0"/>
-  <path d="m403.72,1108.89v21.02c-28.68,32.22-57.1,64.16-87.72,98.55h87.05v27.1h-124.68v-28.42c25.73-28.55,52.05-57.74,81.02-89.88h-80.67v-28.36h124.98Z" fill="#7a7a7a" stroke-width="0"/>
-  <path d="m900.13,840.53c35.32,35,68.53,67.91,103.71,102.78v-84.31h25.96v148.54c-35.16-34.47-68.49-67.15-104.09-102.04v88.97h-25.59v-153.95Z" fill="#7a7a7a" stroke-width="0"/>
-  <path d="m1006.95,1219.73c7.56-3.75,18.36-8.63,24.87-11.86,22.7,30.27,57.55,23.3,72.48,9.86,14.93-13.44,19.41-32.85,13.81-52.29-7.32-18.81-23.54-33.49-47.3-32.66-23.76.83-44.67,17.75-46.66,46.2h-27.56c-.32-27.45,19.87-59.9,51.56-69.43,39.86-11.99,79.37,7.25,94.25,45.88,13.79,35.8-3.14,75.91-39.55,93.66-33.24,16.21-76.78,3.82-95.9-29.36Z" fill="#7a7a7a" stroke-width="0"/>
-  <path d="m562.28,879c-6.06,4.6-13.45,9.69-18.69,13.67-30.67-25.38-65.52-16.91-77.96,14.94-7.47,20.91,3.44,48.31,26.4,56.76,19.97,7.34,42.08-.34,47.83-17.24-4.74-1.43-9.47-2.86-15.47-4.68-.51-7.03-1.02-14.06-1.62-22.4h53.03c3.97,30.68-20.48,64.7-51.72,72.4-34.54,8.51-70.85-10.52-83.43-43.72-12.4-32.74,2.64-71.9,33.89-88.27,30.63-16.05,69.43-8.42,87.73,18.56Z" fill="#7a7a7a" stroke-width="0"/>
-  <path d="m270.94,848.74c26.4,50.55,50.96,97.57,76.62,146.71h-29.28c-15.28-27.69-30.93-56.05-47.5-86.07-15.85,29.74-30.73,57.67-45.91,86.16h-29.87c25.13-48.57,49.73-96.13,75.94-146.8Z" fill="#7a7a7a" stroke-width="0"/>
-  <path d="m1170.37,881.96h-49.28v-25.32h123.91v25.33h-47.34v112.78h-27.29v-112.8Z" fill="#7a7a7a" stroke-width="0"/>
-  <g>
-    <path d="m553.53,1135.42v36.02l34.56.5s-.01,16.45-.01,27.57c-14.07.49-29.08.24-42.92-3.03-11.71-2.77-20.12-12.39-20.64-25.22-.82-20.25-.22-41.56-.22-63.12h118.59v27.28h-89.36Z" fill="#7a7a7a" stroke-width="0"/>
-    <path d="m524.56,1255.14v-37.59c7.73-.35,14.91-.68,22.44-1.03,1.9,3.83,1.68,10.85,5.45,11.01,3.77.16,58.97,0,89.86,0v27.61h-117.75Z" fill="#7a7a7a" stroke-width="0"/>
-  </g>
-  <g>
-    <path d="m712.11,883.49v33.32l31.97.46s-.01,15.22-.01,25.51c-13.07.22-26.9.22-39.7-2.8-10.83-2.56-18.61-11.46-19.09-23.33-.76-18.74-.2-38.45-.2-58.39h109.71v25.23h-82.67Z" fill="#7a7a7a" stroke-width="0"/>
-    <path d="m685.31,994.24v-34.77c7.15-.33,13.79-.63,20.76-.95,1.75,3.54,1.55,10.04,5.04,10.18,3.49.15,54.55,0,83.12,0v25.54h-108.93Z" fill="#7a7a7a" stroke-width="0"/>
-  </g>
-</svg>
\ No newline at end of file
+§§include(/a0/docs/res/a0-vector-graphics/dark.svg)
\ No newline at end of file
diff --git a/docs/res/a0-vector-graphics/darkSymbol.svg b/docs/res/a0-vector-graphics/darkSymbol.svg
old mode 100755
new mode 100644
index 893fc49b25..7e090eee64
--- a/docs/res/a0-vector-graphics/darkSymbol.svg
+++ b/docs/res/a0-vector-graphics/darkSymbol.svg
@@ -1,5 +1 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 960 960">
-  <path d="m717.77,788.27c-78.78-135.38-157.9-271.35-238.62-410.05-79.86,138.37-158.57,274.73-237.16,410.9h-121.99C239.91,581.87,479.49,170.89,479.49,170.89h0s240.63,410.03,360.51,617.38h-122.23Z" fill="#7a7a7a" stroke-width="0"/>
-  <path d="m633.08,788.85h-309.54c20.61-35.84,40.55-70.52,60.34-104.92h190.22c19.28,34.3,38.47,68.43,58.98,104.92Z" fill="#7a7a7a" stroke-width="0"/>
-</svg>
\ No newline at end of file
+§§include(/a0/docs/res/a0-vector-graphics/darkSymbol.svg)
\ No newline at end of file
diff --git a/docs/res/a0-vector-graphics/light.svg b/docs/res/a0-vector-graphics/light.svg
old mode 100755
new mode 100644
index b8148ecf0d..3725732396
--- a/docs/res/a0-vector-graphics/light.svg
+++ b/docs/res/a0-vector-graphics/light.svg
@@ -1,20 +1 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1440 1440">
-  <path d="m922.78,720.83c-68.52-117.75-137.34-236.02-207.55-356.67-69.47,120.35-137.92,238.96-206.28,357.4h-106.11c104.3-180.26,312.68-537.74,312.68-537.74h0s209.3,356.65,313.57,537h-106.32Z" fill="#383838" stroke-width="0"/>
-  <path d="m849.11,721.33h-269.24c17.93-31.17,35.27-61.34,52.48-91.26h165.45c16.77,29.83,33.46,59.52,51.3,91.26Z" fill="#383838" stroke-width="0"/>
-  <path d="m855.05,1206.57c10.69,16.35,20.76,31.74,32.02,48.96h-34.84c-10.02-13.63-20.9-28.43-31.61-42.99h-39.57v42.82h-27.99v-147.26c16.45,0,32.16-.12,47.86.04,12.86.13,25.92-.69,38.54,1.21,20.63,3.1,36.67,14.08,41.02,35.55,4.51,22.21.88,42.92-19.45,57.3-1.72,1.22-3.4,2.49-5.98,4.38Zm-73.95-22.11c18.62,0,36.54,1.02,54.27-.36,11.76-.92,16.96-9.82,17.29-21.79.34-12.53-4.94-22.22-16.97-23.4-17.96-1.76-36.23-.46-54.58-.46v46.01Z" fill="#383838" stroke-width="0"/>
-  <path d="m403.72,1108.89v21.02c-28.68,32.22-57.1,64.16-87.72,98.55h87.05v27.1h-124.68v-28.42c25.73-28.55,52.05-57.74,81.02-89.88h-80.67v-28.36h124.98Z" fill="#383838" stroke-width="0"/>
-  <path d="m900.13,840.53c35.32,35,68.53,67.91,103.71,102.78v-84.31h25.96v148.54c-35.16-34.47-68.49-67.15-104.09-102.04v88.97h-25.59v-153.95Z" fill="#383838" stroke-width="0"/>
-  <path d="m1006.95,1219.73c7.56-3.75,18.36-8.63,24.87-11.86,22.7,30.27,57.55,23.3,72.48,9.86s19.41-32.85,13.81-52.29c-7.32-18.81-23.54-33.49-47.3-32.66-23.76.83-44.67,17.75-46.66,46.2h-27.56c-.32-27.45,19.87-59.9,51.56-69.43,39.86-11.99,79.37,7.25,94.25,45.88,13.79,35.8-3.14,75.91-39.55,93.66-33.24,16.21-76.78,3.82-95.9-29.36Z" fill="#383838" stroke-width="0"/>
-  <path d="m562.28,879c-6.06,4.6-13.45,9.69-18.69,13.67-30.67-25.38-65.52-16.91-77.96,14.94-7.47,20.91,3.44,48.31,26.4,56.76,19.97,7.34,42.08-.34,47.83-17.24-4.74-1.43-9.47-2.86-15.47-4.68-.51-7.03-1.02-14.06-1.62-22.4h53.03c3.97,30.68-20.48,64.7-51.72,72.4-34.54,8.51-70.85-10.52-83.43-43.72-12.4-32.74,2.64-71.9,33.89-88.27,30.63-16.05,69.43-8.42,87.73,18.56Z" fill="#383838" stroke-width="0"/>
-  <path d="m270.94,848.74c26.4,50.55,50.96,97.57,76.62,146.71h-29.28c-15.28-27.69-30.93-56.05-47.5-86.07-15.85,29.74-30.73,57.67-45.91,86.16h-29.87c25.13-48.57,49.73-96.13,75.94-146.8Z" fill="#383838" stroke-width="0"/>
-  <path d="m1170.37,881.96h-49.28v-25.32h123.91v25.33h-47.34v112.78h-27.29v-112.8Z" fill="#383838" stroke-width="0"/>
-  <g>
-    <path d="m553.53,1135.42v36.02l34.56.5s-.01,16.45-.01,27.57c-14.07.49-29.08.24-42.92-3.03-11.71-2.77-20.12-12.39-20.64-25.22-.82-20.25-.22-41.56-.22-63.12h118.59v27.28h-89.36Z" fill="#383838" stroke-width="0"/>
-    <path d="m524.56,1255.14v-37.59c7.73-.35,14.91-.68,22.44-1.03,1.9,3.83,1.68,10.85,5.45,11.01s58.97,0,89.86,0v27.61h-117.75Z" fill="#383838" stroke-width="0"/>
-  </g>
-  <g>
-    <path d="m712.11,883.49v33.32l31.97.46s-.01,15.22-.01,25.51c-13.07.22-26.9.22-39.7-2.8-10.83-2.56-18.61-11.46-19.09-23.33-.76-18.74-.2-38.45-.2-58.39h109.71v25.23h-82.67Z" fill="#383838" stroke-width="0"/>
-    <path d="m685.31,994.24v-34.77c7.15-.33,13.79-.63,20.76-.95,1.75,3.54,1.55,10.04,5.04,10.18s54.55,0,83.12,0v25.54h-108.93Z" fill="#383838" stroke-width="0"/>
-  </g>
-</svg>
\ No newline at end of file
+§§include(/a0/docs/res/a0-vector-graphics/light.svg)
\ No newline at end of file
diff --git a/docs/res/a0-vector-graphics/lightSymbol.svg b/docs/res/a0-vector-graphics/lightSymbol.svg
old mode 100755
new mode 100644
index c988a103ea..6643292991
--- a/docs/res/a0-vector-graphics/lightSymbol.svg
+++ b/docs/res/a0-vector-graphics/lightSymbol.svg
@@ -1,5 +1 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 960 960">
-  <path d="m717.77,788.27c-78.78-135.38-157.9-271.35-238.62-410.05-79.86,138.37-158.57,274.73-237.16,410.9h-121.99C239.91,581.87,479.49,170.89,479.49,170.89h0s240.63,410.03,360.51,617.38h-122.23Z" fill="#383838" stroke-width="0"/>
-  <path d="m633.08,788.85h-309.54c20.61-35.84,40.55-70.52,60.34-104.92h190.22c19.28,34.3,38.47,68.43,58.98,104.92Z" fill="#383838" stroke-width="0"/>
-</svg>
\ No newline at end of file
+§§include(/a0/docs/res/a0-vector-graphics/lightSymbol.svg)
\ No newline at end of file
diff --git a/models.py b/models.py
index fbc2694dfd..2423bf15a9 100644
--- a/models.py
+++ b/models.py
@@ -1,919 +1 @@
-from dataclasses import dataclass, field
-from enum import Enum
-import logging
-import os
-from typing import (
-    Any,
-    Awaitable,
-    Callable,
-    List,
-    Optional,
-    Iterator,
-    AsyncIterator,
-    Tuple,
-    TypedDict,
-)
-
-from litellm import completion, acompletion, embedding
-import litellm
-import openai
-from litellm.types.utils import ModelResponse
-
-from python.helpers import dotenv
-from python.helpers import settings, dirty_json
-from python.helpers.dotenv import load_dotenv
-from python.helpers.providers import get_provider_config
-from python.helpers.rate_limiter import RateLimiter
-from python.helpers.tokens import approximate_tokens
-from python.helpers import dirty_json, browser_use_monkeypatch
-
-from langchain_core.language_models.chat_models import SimpleChatModel
-from langchain_core.outputs.chat_generation import ChatGenerationChunk
-from langchain_core.callbacks.manager import (
-    CallbackManagerForLLMRun,
-    AsyncCallbackManagerForLLMRun,
-)
-from langchain_core.messages import (
-    BaseMessage,
-    AIMessageChunk,
-    HumanMessage,
-    SystemMessage,
-)
-from langchain.embeddings.base import Embeddings
-from sentence_transformers import SentenceTransformer
-from pydantic import ConfigDict
-
-
-# disable extra logging, must be done repeatedly, otherwise browser-use will turn it back on for some reason
-def turn_off_logging():
-    os.environ["LITELLM_LOG"] = "ERROR"  # only errors
-    litellm.suppress_debug_info = True
-    # Silence **all** LiteLLM sub-loggers (utils, cost_calculator…)
-    for name in logging.Logger.manager.loggerDict:
-        if name.lower().startswith("litellm"):
-            logging.getLogger(name).setLevel(logging.ERROR)
-
-
-# init
-load_dotenv()
-turn_off_logging()
-browser_use_monkeypatch.apply()
-
-litellm.modify_params = True # helps fix anthropic tool calls by browser-use
-
-class ModelType(Enum):
-    CHAT = "Chat"
-    EMBEDDING = "Embedding"
-
-
-@dataclass
-class ModelConfig:
-    type: ModelType
-    provider: str
-    name: str
-    api_base: str = ""
-    ctx_length: int = 0
-    limit_requests: int = 0
-    limit_input: int = 0
-    limit_output: int = 0
-    vision: bool = False
-    kwargs: dict = field(default_factory=dict)
-
-    def build_kwargs(self):
-        kwargs = self.kwargs.copy() or {}
-        if self.api_base and "api_base" not in kwargs:
-            kwargs["api_base"] = self.api_base
-        return kwargs
-
-
-class ChatChunk(TypedDict):
-    """Simplified response chunk for chat models."""
-    response_delta: str
-    reasoning_delta: str
-
-class ChatGenerationResult:
-    """Chat generation result object"""
-    def __init__(self, chunk: ChatChunk|None = None):
-        self.reasoning = ""
-        self.response = ""
-        self.thinking = False
-        self.thinking_tag = ""
-        self.unprocessed = ""
-        self.native_reasoning = False
-        self.thinking_pairs = [("<think>", "</think>"), ("<reasoning>", "</reasoning>")]
-        if chunk:
-            self.add_chunk(chunk)
-
-    def add_chunk(self, chunk: ChatChunk) -> ChatChunk:
-        if chunk["reasoning_delta"]:
-            self.native_reasoning = True
-
-        # if native reasoning detection works, there's no need to worry about thinking tags
-        if self.native_reasoning:
-            processed_chunk = ChatChunk(response_delta=chunk["response_delta"], reasoning_delta=chunk["reasoning_delta"])
-        else:
-            # if the model outputs thinking tags, we ned to parse them manually as reasoning
-            processed_chunk = self._process_thinking_chunk(chunk)
-
-        self.reasoning += processed_chunk["reasoning_delta"]
-        self.response += processed_chunk["response_delta"]
-
-        return processed_chunk
-
-    def _process_thinking_chunk(self, chunk: ChatChunk) -> ChatChunk:
-        response_delta = self.unprocessed + chunk["response_delta"]
-        self.unprocessed = ""
-        return self._process_thinking_tags(response_delta, chunk["reasoning_delta"])
-
-    def _process_thinking_tags(self, response: str, reasoning: str) -> ChatChunk:
-        if self.thinking:
-            close_pos = response.find(self.thinking_tag)
-            if close_pos != -1:
-                reasoning += response[:close_pos]
-                response = response[close_pos + len(self.thinking_tag):]
-                self.thinking = False
-                self.thinking_tag = ""
-            else:
-                if self._is_partial_closing_tag(response):
-                    self.unprocessed = response
-                    response = ""
-                else:
-                    reasoning += response
-                    response = ""
-        else:
-            for opening_tag, closing_tag in self.thinking_pairs:
-                if response.startswith(opening_tag):
-                    response = response[len(opening_tag):]
-                    self.thinking = True
-                    self.thinking_tag = closing_tag
-
-                    close_pos = response.find(closing_tag)
-                    if close_pos != -1:
-                        reasoning += response[:close_pos]
-                        response = response[close_pos + len(closing_tag):]
-                        self.thinking = False
-                        self.thinking_tag = ""
-                    else:
-                        if self._is_partial_closing_tag(response):
-                            self.unprocessed = response
-                            response = ""
-                        else:
-                            reasoning += response
-                            response = ""
-                    break
-                elif len(response) < len(opening_tag) and self._is_partial_opening_tag(response, opening_tag):
-                    self.unprocessed = response
-                    response = ""
-                    break
-
-        return ChatChunk(response_delta=response, reasoning_delta=reasoning)
-
-    def _is_partial_opening_tag(self, text: str, opening_tag: str) -> bool:
-        for i in range(1, len(opening_tag)):
-            if text == opening_tag[:i]:
-                return True
-        return False
-
-    def _is_partial_closing_tag(self, text: str) -> bool:
-        if not self.thinking_tag or not text:
-            return False
-        max_check = min(len(text), len(self.thinking_tag) - 1)
-        for i in range(1, max_check + 1):
-            if text.endswith(self.thinking_tag[:i]):
-                return True
-        return False
-
-    def output(self) -> ChatChunk:
-        response = self.response
-        reasoning = self.reasoning
-        if self.unprocessed:
-            if reasoning and not response:
-                reasoning += self.unprocessed
-            else:
-                response += self.unprocessed
-        return ChatChunk(response_delta=response, reasoning_delta=reasoning)
-
-
-rate_limiters: dict[str, RateLimiter] = {}
-api_keys_round_robin: dict[str, int] = {}
-
-
-def get_api_key(service: str) -> str:
-    # get api key for the service
-    key = (
-        dotenv.get_dotenv_value(f"API_KEY_{service.upper()}")
-        or dotenv.get_dotenv_value(f"{service.upper()}_API_KEY")
-        or dotenv.get_dotenv_value(f"{service.upper()}_API_TOKEN")
-        or "None"
-    )
-    # if the key contains a comma, use round-robin
-    if "," in key:
-        api_keys = [k.strip() for k in key.split(",") if k.strip()]
-        api_keys_round_robin[service] = api_keys_round_robin.get(service, -1) + 1
-        key = api_keys[api_keys_round_robin[service] % len(api_keys)]
-    return key
-
-
-def get_rate_limiter(
-    provider: str, name: str, requests: int, input: int, output: int
-) -> RateLimiter:
-    key = f"{provider}\\{name}"
-    rate_limiters[key] = limiter = rate_limiters.get(key, RateLimiter(seconds=60))
-    limiter.limits["requests"] = requests or 0
-    limiter.limits["input"] = input or 0
-    limiter.limits["output"] = output or 0
-    return limiter
-
-
-def _is_transient_litellm_error(exc: Exception) -> bool:
-    """Uses status_code when available, else falls back to exception types"""
-    # Prefer explicit status codes if present
-    status_code = getattr(exc, "status_code", None)
-    if isinstance(status_code, int):
-        if status_code in (408, 429, 500, 502, 503, 504):
-            return True
-        # Treat other 5xx as retriable
-        if status_code >= 500:
-            return True
-        return False
-
-    # Fallback to exception classes mapped by LiteLLM/OpenAI
-    transient_types = (
-        getattr(openai, "APITimeoutError", Exception),
-        getattr(openai, "APIConnectionError", Exception),
-        getattr(openai, "RateLimitError", Exception),
-        getattr(openai, "APIError", Exception),
-        getattr(openai, "InternalServerError", Exception),
-        # Some providers map overloads to ServiceUnavailable-like errors
-        getattr(openai, "APIStatusError", Exception),
-    )
-    return isinstance(exc, transient_types)
-
-
-async def apply_rate_limiter(
-    model_config: ModelConfig | None,
-    input_text: str,
-    rate_limiter_callback: (
-        Callable[[str, str, int, int], Awaitable[bool]] | None
-    ) = None,
-):
-    if not model_config:
-        return
-    limiter = get_rate_limiter(
-        model_config.provider,
-        model_config.name,
-        model_config.limit_requests,
-        model_config.limit_input,
-        model_config.limit_output,
-    )
-    limiter.add(input=approximate_tokens(input_text))
-    limiter.add(requests=1)
-    await limiter.wait(rate_limiter_callback)
-    return limiter
-
-
-def apply_rate_limiter_sync(
-    model_config: ModelConfig | None,
-    input_text: str,
-    rate_limiter_callback: (
-        Callable[[str, str, int, int], Awaitable[bool]] | None
-    ) = None,
-):
-    if not model_config:
-        return
-    import asyncio, nest_asyncio
-
-    nest_asyncio.apply()
-    return asyncio.run(
-        apply_rate_limiter(model_config, input_text, rate_limiter_callback)
-    )
-
-
-class LiteLLMChatWrapper(SimpleChatModel):
-    model_name: str
-    provider: str
-    kwargs: dict = {}
-
-    model_config = ConfigDict(
-        arbitrary_types_allowed=True,
-        extra="allow",
-        validate_assignment=False,
-    )
-
-    def __init__(
-        self,
-        model: str,
-        provider: str,
-        model_config: Optional[ModelConfig] = None,
-        **kwargs: Any,
-    ):
-        model_value = f"{provider}/{model}"
-        super().__init__(model_name=model_value, provider=provider, kwargs=kwargs)  # type: ignore
-        # Set A0 model config as instance attribute after parent init
-        self.a0_model_conf = model_config
-
-    @property
-    def _llm_type(self) -> str:
-        return "litellm-chat"
-
-    def _convert_messages(self, messages: List[BaseMessage]) -> List[dict]:
-        result = []
-        # Map LangChain message types to LiteLLM roles
-        role_mapping = {
-            "human": "user",
-            "ai": "assistant",
-            "system": "system",
-            "tool": "tool",
-        }
-        for m in messages:
-            role = role_mapping.get(m.type, m.type)
-            message_dict = {"role": role, "content": m.content}
-
-            # Handle tool calls for AI messages
-            tool_calls = getattr(m, "tool_calls", None)
-            if tool_calls:
-                # Convert LangChain tool calls to LiteLLM format
-                new_tool_calls = []
-                for tool_call in tool_calls:
-                    # Ensure arguments is a JSON string
-                    args = tool_call["args"]
-                    if isinstance(args, dict):
-                        import json
-
-                        args_str = json.dumps(args)
-                    else:
-                        args_str = str(args)
-
-                    new_tool_calls.append(
-                        {
-                            "id": tool_call.get("id", ""),
-                            "type": "function",
-                            "function": {
-                                "name": tool_call["name"],
-                                "arguments": args_str,
-                            },
-                        }
-                    )
-                message_dict["tool_calls"] = new_tool_calls
-
-            # Handle tool call ID for ToolMessage
-            tool_call_id = getattr(m, "tool_call_id", None)
-            if tool_call_id:
-                message_dict["tool_call_id"] = tool_call_id
-
-            result.append(message_dict)
-        return result
-
-    def _call(
-        self,
-        messages: List[BaseMessage],
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> str:
-        import asyncio
-
-        msgs = self._convert_messages(messages)
-
-        # Apply rate limiting if configured
-        apply_rate_limiter_sync(self.a0_model_conf, str(msgs))
-
-        # Call the model
-        resp = completion(
-            model=self.model_name, messages=msgs, stop=stop, **{**self.kwargs, **kwargs}
-        )
-
-        # Parse output
-        parsed = _parse_chunk(resp)
-        output = ChatGenerationResult(parsed).output()
-        return output["response_delta"]
-
-    def _stream(
-        self,
-        messages: List[BaseMessage],
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> Iterator[ChatGenerationChunk]:
-        import asyncio
-
-        msgs = self._convert_messages(messages)
-
-        # Apply rate limiting if configured
-        apply_rate_limiter_sync(self.a0_model_conf, str(msgs))
-
-        result = ChatGenerationResult()
-
-        for chunk in completion(
-            model=self.model_name,
-            messages=msgs,
-            stream=True,
-            stop=stop,
-            **{**self.kwargs, **kwargs},
-        ):
-            # parse chunk
-            parsed = _parse_chunk(chunk) # chunk parsing
-            output = result.add_chunk(parsed) # chunk processing
-
-            # Only yield chunks with non-None content
-            if output["response_delta"]:
-                yield ChatGenerationChunk(
-                    message=AIMessageChunk(content=output["response_delta"])
-                )
-
-    async def _astream(
-        self,
-        messages: List[BaseMessage],
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> AsyncIterator[ChatGenerationChunk]:
-        msgs = self._convert_messages(messages)
-
-        # Apply rate limiting if configured
-        await apply_rate_limiter(self.a0_model_conf, str(msgs))
-
-        result = ChatGenerationResult()
-
-        response = await acompletion(
-            model=self.model_name,
-            messages=msgs,
-            stream=True,
-            stop=stop,
-            **{**self.kwargs, **kwargs},
-        )
-        async for chunk in response:  # type: ignore
-            # parse chunk
-            parsed = _parse_chunk(chunk) # chunk parsing
-            output = result.add_chunk(parsed) # chunk processing
-
-            # Only yield chunks with non-None content
-            if output["response_delta"]:
-                yield ChatGenerationChunk(
-                    message=AIMessageChunk(content=output["response_delta"])
-                )
-
-    async def unified_call(
-        self,
-        system_message="",
-        user_message="",
-        messages: List[BaseMessage] | None = None,
-        response_callback: Callable[[str, str], Awaitable[None]] | None = None,
-        reasoning_callback: Callable[[str, str], Awaitable[None]] | None = None,
-        tokens_callback: Callable[[str, int], Awaitable[None]] | None = None,
-        rate_limiter_callback: (
-            Callable[[str, str, int, int], Awaitable[bool]] | None
-        ) = None,
-        **kwargs: Any,
-    ) -> Tuple[str, str]:
-
-        turn_off_logging()
-
-        if not messages:
-            messages = []
-        # construct messages
-        if system_message:
-            messages.insert(0, SystemMessage(content=system_message))
-        if user_message:
-            messages.append(HumanMessage(content=user_message))
-
-        # convert to litellm format
-        msgs_conv = self._convert_messages(messages)
-
-        # Apply rate limiting if configured
-        limiter = await apply_rate_limiter(
-            self.a0_model_conf, str(msgs_conv), rate_limiter_callback
-        )
-
-        # Prepare call kwargs and retry config (strip A0-only params before calling LiteLLM)
-        call_kwargs: dict[str, Any] = {**self.kwargs, **kwargs}
-        max_retries: int = int(call_kwargs.pop("a0_retry_attempts", 2))
-        retry_delay_s: float = float(call_kwargs.pop("a0_retry_delay_seconds", 1.5))
-        stream = reasoning_callback is not None or response_callback is not None or tokens_callback is not None
-
-        # results
-        result = ChatGenerationResult()
-
-        attempt = 0
-        while True:
-            got_any_chunk = False
-            try:
-                # call model
-                _completion = await acompletion(
-                    model=self.model_name,
-                    messages=msgs_conv,
-                    stream=stream,
-                    **call_kwargs,
-                )
-
-                if stream:
-                    # iterate over chunks
-                    async for chunk in _completion:  # type: ignore
-                        got_any_chunk = True
-                        # parse chunk
-                        parsed = _parse_chunk(chunk)
-                        output = result.add_chunk(parsed)
-
-                        # collect reasoning delta and call callbacks
-                        if output["reasoning_delta"]:
-                            if reasoning_callback:
-                                await reasoning_callback(output["reasoning_delta"], result.reasoning)
-                            if tokens_callback:
-                                await tokens_callback(
-                                    output["reasoning_delta"],
-                                    approximate_tokens(output["reasoning_delta"]),
-                                )
-                            # Add output tokens to rate limiter if configured
-                            if limiter:
-                                limiter.add(output=approximate_tokens(output["reasoning_delta"]))
-                        # collect response delta and call callbacks
-                        if output["response_delta"]:
-                            if response_callback:
-                                await response_callback(output["response_delta"], result.response)
-                            if tokens_callback:
-                                await tokens_callback(
-                                    output["response_delta"],
-                                    approximate_tokens(output["response_delta"]),
-                                )
-                            # Add output tokens to rate limiter if configured
-                            if limiter:
-                                limiter.add(output=approximate_tokens(output["response_delta"]))
-
-                # non-stream response
-                else:
-                    parsed = _parse_chunk(_completion)
-                    output = result.add_chunk(parsed)
-                    if limiter:
-                        if output["response_delta"]:
-                            limiter.add(output=approximate_tokens(output["response_delta"]))
-                        if output["reasoning_delta"]:
-                            limiter.add(output=approximate_tokens(output["reasoning_delta"]))
-
-                # Successful completion of stream
-                return result.response, result.reasoning
-
-            except Exception as e:
-                import asyncio
-
-                # Retry only if no chunks received and error is transient
-                if got_any_chunk or not _is_transient_litellm_error(e) or attempt >= max_retries:
-                    raise
-                attempt += 1
-                await asyncio.sleep(retry_delay_s)
-
-
-class AsyncAIChatReplacement:
-    class _Completions:
-        def __init__(self, wrapper):
-            self._wrapper = wrapper
-
-        async def create(self, *args, **kwargs):
-            # call the async _acall method on the wrapper
-            return await self._wrapper._acall(*args, **kwargs)
-
-    class _Chat:
-        def __init__(self, wrapper):
-            self.completions = AsyncAIChatReplacement._Completions(wrapper)
-
-    def __init__(self, wrapper, *args, **kwargs):
-        self._wrapper = wrapper
-        self.chat = AsyncAIChatReplacement._Chat(wrapper)
-
-
-from browser_use.llm import ChatOllama, ChatOpenRouter, ChatGoogle, ChatAnthropic, ChatGroq, ChatOpenAI
-
-class BrowserCompatibleChatWrapper(ChatOpenRouter):
-    """
-    A wrapper for browser agent that can filter/sanitize messages
-    before sending them to the LLM.
-    """
-
-    def __init__(self, *args, **kwargs):
-        turn_off_logging()
-        # Create the underlying LiteLLM wrapper
-        self._wrapper = LiteLLMChatWrapper(*args, **kwargs)
-        # Browser-use may expect a 'model' attribute
-        self.model = self._wrapper.model_name
-        self.kwargs = self._wrapper.kwargs
-
-    @property
-    def model_name(self) -> str:
-        return self._wrapper.model_name
-
-    @property
-    def provider(self) -> str:
-        return self._wrapper.provider
-
-    def get_client(self, *args, **kwargs):  # type: ignore
-        return AsyncAIChatReplacement(self, *args, **kwargs)
-
-    async def _acall(
-        self,
-        messages: List[BaseMessage],
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ):
-        # Apply rate limiting if configured
-        apply_rate_limiter_sync(self._wrapper.a0_model_conf, str(messages))
-
-        # Call the model
-        try:
-            model = kwargs.pop("model", None)
-            kwrgs = {**self._wrapper.kwargs, **kwargs}
-
-            # hack from browser-use to fix json schema for gemini (additionalProperties, $defs, $ref)
-            if "response_format" in kwrgs and "json_schema" in kwrgs["response_format"] and model.startswith("gemini/"):
-                kwrgs["response_format"]["json_schema"] = ChatGoogle("")._fix_gemini_schema(kwrgs["response_format"]["json_schema"])
-
-            resp = await acompletion(
-                model=self._wrapper.model_name,
-                messages=messages,
-                stop=stop,
-                **kwrgs,
-            )
-
-            # Gemini: strip triple backticks and conform schema
-            try:
-                msg = resp.choices[0].message # type: ignore
-                if self.provider == "gemini" and isinstance(getattr(msg, "content", None), str):
-                    cleaned = browser_use_monkeypatch.gemini_clean_and_conform(msg.content) # type: ignore
-                    if cleaned:
-                        msg.content = cleaned
-            except Exception:
-                pass
-
-        except Exception as e:
-            raise e
-
-        # another hack for browser-use post process invalid jsons
-        try:
-            if "response_format" in kwrgs and "json_schema" in kwrgs["response_format"] or "json_object" in kwrgs["response_format"]:
-                if resp.choices[0].message.content is not None and not resp.choices[0].message.content.startswith("{"): # type: ignore
-                    js = dirty_json.parse(resp.choices[0].message.content) # type: ignore
-                    resp.choices[0].message.content = dirty_json.stringify(js) # type: ignore
-        except Exception as e:
-            pass
-
-        return resp
-
-class LiteLLMEmbeddingWrapper(Embeddings):
-    model_name: str
-    kwargs: dict = {}
-    a0_model_conf: Optional[ModelConfig] = None
-
-    def __init__(
-        self,
-        model: str,
-        provider: str,
-        model_config: Optional[ModelConfig] = None,
-        **kwargs: Any,
-    ):
-        self.model_name = f"{provider}/{model}" if provider != "openai" else model
-        self.kwargs = kwargs
-        self.a0_model_conf = model_config
-
-    def embed_documents(self, texts: List[str]) -> List[List[float]]:
-        # Apply rate limiting if configured
-        apply_rate_limiter_sync(self.a0_model_conf, " ".join(texts))
-
-        resp = embedding(model=self.model_name, input=texts, **self.kwargs)
-        return [
-            item.get("embedding") if isinstance(item, dict) else item.embedding  # type: ignore
-            for item in resp.data  # type: ignore
-        ]
-
-    def embed_query(self, text: str) -> List[float]:
-        # Apply rate limiting if configured
-        apply_rate_limiter_sync(self.a0_model_conf, text)
-
-        resp = embedding(model=self.model_name, input=[text], **self.kwargs)
-        item = resp.data[0]  # type: ignore
-        return item.get("embedding") if isinstance(item, dict) else item.embedding  # type: ignore
-
-
-class LocalSentenceTransformerWrapper(Embeddings):
-    """Local wrapper for sentence-transformers models to avoid HuggingFace API calls"""
-
-    def __init__(
-        self,
-        provider: str,
-        model: str,
-        model_config: Optional[ModelConfig] = None,
-        **kwargs: Any,
-    ):
-        # Clean common user-input mistakes
-        model = model.strip().strip('"').strip("'")
-
-        # Remove the "sentence-transformers/" prefix if present
-        if model.startswith("sentence-transformers/"):
-            model = model[len("sentence-transformers/") :]
-
-        # Filter kwargs for SentenceTransformer only (no LiteLLM params like 'stream_timeout')
-        st_allowed_keys = {
-            "device",
-            "cache_folder",
-            "use_auth_token",
-            "revision",
-            "trust_remote_code",
-            "model_kwargs",
-        }
-        st_kwargs = {k: v for k, v in (kwargs or {}).items() if k in st_allowed_keys}
-
-        self.model = SentenceTransformer(model, **st_kwargs)
-        self.model_name = model
-        self.a0_model_conf = model_config
-
-    def embed_documents(self, texts: List[str]) -> List[List[float]]:
-        # Apply rate limiting if configured
-        apply_rate_limiter_sync(self.a0_model_conf, " ".join(texts))
-
-        embeddings = self.model.encode(texts, convert_to_tensor=False)  # type: ignore
-        return embeddings.tolist() if hasattr(embeddings, "tolist") else embeddings  # type: ignore
-
-    def embed_query(self, text: str) -> List[float]:
-        # Apply rate limiting if configured
-        apply_rate_limiter_sync(self.a0_model_conf, text)
-
-        embedding = self.model.encode([text], convert_to_tensor=False)  # type: ignore
-        result = (
-            embedding[0].tolist() if hasattr(embedding[0], "tolist") else embedding[0]
-        )
-        return result  # type: ignore
-
-
-def _get_litellm_chat(
-    cls: type = LiteLLMChatWrapper,
-    model_name: str = "",
-    provider_name: str = "",
-    model_config: Optional[ModelConfig] = None,
-    **kwargs: Any,
-):
-    # use api key from kwargs or env
-    api_key = kwargs.pop("api_key", None) or get_api_key(provider_name)
-
-    # Only pass API key if key is not a placeholder
-    if api_key and api_key not in ("None", "NA"):
-        kwargs["api_key"] = api_key
-
-    provider_name, model_name, kwargs = _adjust_call_args(
-        provider_name, model_name, kwargs
-    )
-    return cls(
-        provider=provider_name, model=model_name, model_config=model_config, **kwargs
-    )
-
-
-def _get_litellm_embedding(
-    model_name: str,
-    provider_name: str,
-    model_config: Optional[ModelConfig] = None,
-    **kwargs: Any,
-):
-    # Check if this is a local sentence-transformers model
-    if provider_name == "huggingface" and model_name.startswith(
-        "sentence-transformers/"
-    ):
-        # Use local sentence-transformers instead of LiteLLM for local models
-        provider_name, model_name, kwargs = _adjust_call_args(
-            provider_name, model_name, kwargs
-        )
-        return LocalSentenceTransformerWrapper(
-            provider=provider_name,
-            model=model_name,
-            model_config=model_config,
-            **kwargs,
-        )
-
-    # use api key from kwargs or env
-    api_key = kwargs.pop("api_key", None) or get_api_key(provider_name)
-
-    # Only pass API key if key is not a placeholder
-    if api_key and api_key not in ("None", "NA"):
-        kwargs["api_key"] = api_key
-
-    provider_name, model_name, kwargs = _adjust_call_args(
-        provider_name, model_name, kwargs
-    )
-    return LiteLLMEmbeddingWrapper(
-        model=model_name, provider=provider_name, model_config=model_config, **kwargs
-    )
-
-
-def _parse_chunk(chunk: Any) -> ChatChunk:
-    delta = chunk["choices"][0].get("delta", {})
-    message = chunk["choices"][0].get("message", {}) or chunk["choices"][0].get(
-        "model_extra", {}
-    ).get("message", {})
-    response_delta = (
-        delta.get("content", "")
-        if isinstance(delta, dict)
-        else getattr(delta, "content", "")
-    ) or (
-        message.get("content", "")
-        if isinstance(message, dict)
-        else getattr(message, "content", "")
-    )
-    reasoning_delta = (
-        delta.get("reasoning_content", "")
-        if isinstance(delta, dict)
-        else getattr(delta, "reasoning_content", "")
-    ) or (
-        message.get("reasoning_content", "")
-        if isinstance(message, dict)
-        else getattr(message, "reasoning_content", "")
-    )
-
-    return ChatChunk(reasoning_delta=reasoning_delta, response_delta=response_delta)
-
-
-
-def _adjust_call_args(provider_name: str, model_name: str, kwargs: dict):
-    # for openrouter add app reference
-    if provider_name == "openrouter":
-        kwargs["extra_headers"] = {
-            "HTTP-Referer": "https://agent-zero.ai",
-            "X-Title": "Agent Zero",
-        }
-
-    # remap other to openai for litellm
-    if provider_name == "other":
-        provider_name = "openai"
-
-    return provider_name, model_name, kwargs
-
-
-def _merge_provider_defaults(
-    provider_type: str, original_provider: str, kwargs: dict
-) -> tuple[str, dict]:
-    # Normalize .env-style numeric strings (e.g., "timeout=30") into ints/floats for LiteLLM
-    def _normalize_values(values: dict) -> dict:
-        result: dict[str, Any] = {}
-        for k, v in values.items():
-            if isinstance(v, str):
-                try:
-                    result[k] = int(v)
-                except ValueError:
-                    try:
-                        result[k] = float(v)
-                    except ValueError:
-                        result[k] = v
-            else:
-                result[k] = v
-        return result
-
-    provider_name = original_provider  # default: unchanged
-    cfg = get_provider_config(provider_type, original_provider)
-    if cfg:
-        provider_name = cfg.get("litellm_provider", original_provider).lower()
-
-        # Extra arguments nested under `kwargs` for readability
-        extra_kwargs = cfg.get("kwargs") if isinstance(cfg, dict) else None  # type: ignore[arg-type]
-        if isinstance(extra_kwargs, dict):
-            for k, v in extra_kwargs.items():
-                kwargs.setdefault(k, v)
-
-    # Inject API key based on the *original* provider id if still missing
-    if "api_key" not in kwargs:
-        key = get_api_key(original_provider)
-        if key and key not in ("None", "NA"):
-            kwargs["api_key"] = key
-
-    # Merge LiteLLM global kwargs (timeouts, stream_timeout, etc.)
-    try:
-        global_kwargs = settings.get_settings().get("litellm_global_kwargs", {})  # type: ignore[union-attr]
-    except Exception:
-        global_kwargs = {}
-    if isinstance(global_kwargs, dict):
-        for k, v in _normalize_values(global_kwargs).items():
-            kwargs.setdefault(k, v)
-
-    return provider_name, kwargs
-
-
-def get_chat_model(
-    provider: str, name: str, model_config: Optional[ModelConfig] = None, **kwargs: Any
-) -> LiteLLMChatWrapper:
-    orig = provider.lower()
-    provider_name, kwargs = _merge_provider_defaults("chat", orig, kwargs)
-    return _get_litellm_chat(
-        LiteLLMChatWrapper, name, provider_name, model_config, **kwargs
-    )
-
-
-def get_browser_model(
-    provider: str, name: str, model_config: Optional[ModelConfig] = None, **kwargs: Any
-) -> BrowserCompatibleChatWrapper:
-    orig = provider.lower()
-    provider_name, kwargs = _merge_provider_defaults("chat", orig, kwargs)
-    return _get_litellm_chat(
-        BrowserCompatibleChatWrapper, name, provider_name, model_config, **kwargs
-    )
-
-
-def get_embedding_model(
-    provider: str, name: str, model_config: Optional[ModelConfig] = None, **kwargs: Any
-) -> LiteLLMEmbeddingWrapper | LocalSentenceTransformerWrapper:
-    orig = provider.lower()
-    provider_name, kwargs = _merge_provider_defaults("embedding", orig, kwargs)
-    return _get_litellm_embedding(name, provider_name, model_config, **kwargs)
+§§include(/a0/models.py)
\ No newline at end of file
diff --git a/python/extensions/agent_init/_10_initial_message.py b/python/extensions/agent_init/_10_initial_message.py
index f64a3fce44..0da2814617 100644
--- a/python/extensions/agent_init/_10_initial_message.py
+++ b/python/extensions/agent_init/_10_initial_message.py
@@ -1,42 +1 @@
-import json
-from agent import LoopData
-from python.helpers.extension import Extension
-
-
-class InitialMessage(Extension):
-
-    async def execute(self, **kwargs):
-        """
-        Add an initial greeting message when first user message is processed.
-        Called only once per session via _process_chain method.
-        """
-
-        # Only add initial message for main agent (A0), not subordinate agents
-        if self.agent.number != 0:
-            return
-
-        # If the context already contains log messages, do not add another initial message
-        if self.agent.context.log.logs:
-            return
-
-        # Construct the initial message from prompt template
-        initial_message = self.agent.read_prompt("fw.initial_message.md")
-
-        # add initial loop data to agent (for hist_add_ai_response)
-        self.agent.loop_data = LoopData(user_message=None)
-
-        # Add the message to history as an AI response
-        self.agent.hist_add_ai_response(initial_message)
-
-        # json parse the message, get the tool_args text
-        initial_message_json = json.loads(initial_message)
-        initial_message_text = initial_message_json.get("tool_args", {}).get("text", "Hello! How can I help you?")
-
-        # Add to log (green bubble) for immediate UI display
-        self.agent.context.log.log(
-            type="response",
-            heading=f"{self.agent.agent_name}: Welcome",
-            content=initial_message_text,
-            finished=True,
-            update_progress="none",
-        )
+§§include(/a0/python/extensions/agent_init/_10_initial_message.py)
\ No newline at end of file